In [1]:
#Importing all Required Libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import CountVectorizer
import pickle
import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv("Data/rev.csv")
df.head()

Unnamed: 0,Rating,Review
0,9,fan walk dead ever since first episode first s...
1,9,fan walk dead ever since first episode first s...
2,9,would put series top time despite last series ...
3,9,first thing say normally zombie movie series f...
4,9,everyone know walk dead good use early season ...


In [3]:
X = df["Review"].values
y = df["Rating"].values

In [4]:
#Spliting of Dataset into tarining and testing datasets
review_train, review_test, label_train, label_test = train_test_split(X, y, test_size=0.3, random_state=101)
new = df[['Review','Rating']].copy()

In [5]:
#Applying bag of words Text Feature Extraction with Pipeline using countvectorizer, and classifer - LogisticRegression
pipeline1 = Pipeline([
    ('bag_of_words', CountVectorizer()),
    ('classifier', LogisticRegression(solver='newton-cg', multi_class='multinomial'))
])

pipeline1.fit(review_train, label_train)
pip_pred1 = pipeline1.predict(review_test)
print(metrics.classification_report(label_test,pip_pred1))

filename = 'model_lr.pk'
with open('Models/'+filename, 'wb') as file:
    pickle.dump(pipeline1, file) 

              precision    recall  f1-score   support

           1       0.00      0.00      0.00       973
           2       0.00      0.00      0.00       496
           3       0.00      0.00      0.00       631
           4       0.00      0.00      0.00       709
           5       0.00      0.00      0.00       908
           6       0.00      0.00      0.00      1214
           7       0.00      0.00      0.00      2028
           8       0.23      0.24      0.24      2923
           9       0.00      0.00      0.00      2440
          10       0.22      0.83      0.34      3303

    accuracy                           0.22     15625
   macro avg       0.04      0.11      0.06     15625
weighted avg       0.09      0.22      0.12     15625



In [6]:
#Applying bag of words Text Feature Extraction with Pipeline using countvectorizer, and classifer - MultinomialNB
pipeline2 = Pipeline([
    ('bag_of_words', CountVectorizer()),
    ('classifier', MultinomialNB())
])
pipeline2.fit(review_train, label_train)
pip_pred2 = pipeline2.predict(review_test)
print(metrics.classification_report(label_test, pip_pred2))

filename = 'model_mnb.pk'
with open('Models/'+filename, 'wb') as file:
    pickle.dump(pipeline2, file) 

              precision    recall  f1-score   support

           1       0.10      0.38      0.16       973
           2       0.03      0.04      0.04       496
           3       0.04      0.11      0.06       631
           4       0.05      0.05      0.05       709
           5       0.07      0.05      0.06       908
           6       0.00      0.00      0.00      1214
           7       0.16      0.10      0.12      2028
           8       0.22      0.23      0.23      2923
           9       0.18      0.04      0.07      2440
          10       0.22      0.21      0.22      3303

    accuracy                           0.14     15625
   macro avg       0.11      0.12      0.10     15625
weighted avg       0.15      0.14      0.13     15625



In [7]:
#Applying bag of words Text Feature Extraction with Pipeline using countvectorizer, and classifer - DecisionTreeClassifier
pipeline3 = Pipeline([
    ('bag_of_words', CountVectorizer()),
    ('classifier', DecisionTreeClassifier())
])

pipeline3.fit(review_train, label_train)
pip_pred3 = pipeline3.predict(review_test)
print(metrics.classification_report(label_test, pip_pred3))

filename = 'model_dt.pk'
with open('Models/'+filename, 'wb') as file:
    pickle.dump(pipeline3, file) 

              precision    recall  f1-score   support

           1       0.00      0.00      0.00       973
           2       0.00      0.00      0.00       496
           3       0.00      0.00      0.00       631
           4       0.00      0.00      0.00       709
           5       0.00      0.00      0.00       908
           6       0.00      0.00      0.00      1214
           7       0.00      0.00      0.00      2028
           8       0.23      0.24      0.24      2923
           9       0.00      0.00      0.00      2440
          10       0.22      0.83      0.34      3303

    accuracy                           0.22     15625
   macro avg       0.04      0.11      0.06     15625
weighted avg       0.09      0.22      0.12     15625



In [8]:
#Applying bag of words Text Feature Extraction with Pipeline using countvectorizer, and classifer - RandomForestClassifier
pipeline4 = Pipeline([
    ('bag_of_words', CountVectorizer()),
    ('classifier', RandomForestClassifier())
])

pipeline4.fit(review_train, label_train)
pip_pred4 = pipeline4.predict(review_test)
print(metrics.classification_report(label_test, pip_pred4))

filename = 'model_rf.pk'
with open('Models/'+filename, 'wb') as file:
    pickle.dump(pipeline4, file) 

              precision    recall  f1-score   support

           1       0.00      0.00      0.00       973
           2       0.00      0.00      0.00       496
           3       0.00      0.00      0.00       631
           4       0.00      0.00      0.00       709
           5       0.00      0.00      0.00       908
           6       0.00      0.00      0.00      1214
           7       0.00      0.00      0.00      2028
           8       0.23      0.24      0.24      2923
           9       0.00      0.00      0.00      2440
          10       0.22      0.83      0.34      3303

    accuracy                           0.22     15625
   macro avg       0.04      0.11      0.06     15625
weighted avg       0.09      0.22      0.12     15625

