### Library Importing ###

In [1]:
import pandas as pd 
import numpy as np
import matplotlib as plt
import csv
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import cross_val_score


### Loading dataset ###

In [2]:
df = pd.read_csv('CSV/preprocessed_dataset.csv', index_col=0)

In [3]:
# df.isnull().sum()
df.dropna(subset='clean_text', inplace=True)

In [4]:
df.isnull().sum()


clean_text    0
category      0
dtype: int64

In [5]:
X = df['clean_text']
y = df['category']

### TRAIN TEST SPLIT ###

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.2, random_state=42 )

In [7]:
#Bag of word
vectorizer = CountVectorizer()
X_train_vect = vectorizer.fit_transform(X_train)
X_test_vect = vectorizer.transform(X_test)


In [8]:
#naive bayes
nb_classifier = MultinomialNB()
nb_classifier.fit(X_train_vect, y_train)

In [9]:
y_pred = nb_classifier.predict(X_test_vect)

In [10]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.7245148612134611


In [11]:
print("Classification Report:")
print(classification_report(y_test, y_pred))

Classification Report:
              precision    recall  f1-score   support

        -1.0       0.72      0.59      0.65      6982
         0.0       0.86      0.59      0.70     11120
         1.0       0.67      0.90      0.77     14466

    accuracy                           0.72     32568
   macro avg       0.75      0.69      0.70     32568
weighted avg       0.75      0.72      0.72     32568



Cross-validation

In [12]:
cv_scores = cross_val_score(nb_classifier, X_train_vect, y_train, cv = 5)

In [13]:
cv_scores

array([0.71754817, 0.71309588, 0.71486144, 0.71569493, 0.71853529])

In [14]:
cv_scores.mean()

0.7159471423215094

### Saving the trained model ###

In [15]:
import os
folder_path = 'Model'
os.makedirs(folder_path, exist_ok=True)


In [16]:
import joblib

joblib.dump(nb_classifier, 'Model/sentiment_model.pkl')

['Model/sentiment_model.pkl']

In [21]:
import pickle

#--RE WRITE MODEL.pkl --#
with open("sentiment_model.pkl", "wb") as f:
    pickle.dump(nb_classifier, f)
