In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import GridSearchCV,train_test_split,cross_val_score
from sklearn.naive_bayes import BernoulliNB, GaussianNB, MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import confusion_matrix
from sklearn.pipeline import Pipeline

In [2]:
df = pd.read_csv("Cleaned_Data.csv")

In [3]:
X = df["Clean_text"]
y = df["target"]

In [4]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=10)

In [9]:
pipeline = Pipeline([
    ('vectorizer',CountVectorizer()),
    ('classifier',GaussianNB())
])

In [11]:
param_grid =[
    {
        'classifier': [BernoulliNB(), MultinomialNB()],
        'vectorizer__max_features': [None, 10, 100, 1000, 2000],
        'classifier__alpha': [0.1, 0.5, 1, 10]  
    },
    {
        'classifier': [GaussianNB()], 
        'vectorizer__max_features': [None, 10, 100, 1000, 2000]
    }
]

In [13]:
grid = GridSearchCV(pipeline,param_grid,cv=5,scoring='f1',n_jobs = -1)

In [15]:
grid.fit(X_train,y_train)

In [16]:
grid.best_params_

{'classifier': MultinomialNB(alpha=1),
 'classifier__alpha': 1,
 'vectorizer__max_features': 2000}

In [17]:
y_pred = grid.predict(X_test)

In [18]:
print(accuracy_score(y_test,y_pred))

0.8126649076517151


In [19]:
report = classification_report(y_test,y_pred)
print(report)

              precision    recall  f1-score   support

           0       0.82      0.86      0.84       427
           1       0.80      0.76      0.78       331

    accuracy                           0.81       758
   macro avg       0.81      0.81      0.81       758
weighted avg       0.81      0.81      0.81       758

