In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import GridSearchCV,train_test_split
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import confusion_matrix

In [3]:
df = pd.read_csv("Cleaned_Data.csv")

In [5]:
X = df['Clean_text']
y = df['target']

In [7]:
pipeline = Pipeline([
    ('vectorizer',TfidfVectorizer()),
    ('classifier',SVC())
])

In [9]:
param_grid = [
    {
        'vectorizer__max_features' : [10,100,1000,2000],
        'classifier__kernel' : ['linear','rbf','poly','sigmoid'],
        'classifier__C' : [0.1,1,10,100]
    }
]

In [11]:
grid = GridSearchCV(pipeline,param_grid,cv=5,scoring='f1',n_jobs= -1)

In [13]:
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=42,test_size=0.2)

In [15]:
grid.fit(X_train,y_train)

In [16]:
grid.best_params_

{'classifier__C': 1,
 'classifier__kernel': 'linear',
 'vectorizer__max_features': 2000}

In [17]:
y_pred = grid.predict(X_test)

In [18]:
print(accuracy_score(y_test,y_pred))

0.7889182058047494


In [19]:
report = classification_report(y_test, y_pred)

print("Classification Report:")
print(report)

Classification Report:
              precision    recall  f1-score   support

           0       0.79      0.86      0.82       876
           1       0.78      0.69      0.74       640

    accuracy                           0.79      1516
   macro avg       0.79      0.78      0.78      1516
weighted avg       0.79      0.79      0.79      1516

