In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score,precision_score,f1_score
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.pipeline import Pipeline

In [2]:
df = pd.read_csv('moviereviews2.tsv',sep='\t')

In [3]:
df.isna().sum()

label      0
review    20
dtype: int64

In [4]:
len(df)

6000

In [5]:
df.dropna(inplace=True)

In [6]:
len(df)

5980

In [7]:
blanks = []

for i,col,rv in df.itertuples():
    if(rv.isspace()):
        blanks.append(i)

In [8]:
blanks

[]

In [9]:
df.head()

Unnamed: 0,label,review
0,pos,I loved this movie and will watch it again. Or...
1,pos,"A warm, touching movie that has a fantasy-like..."
2,pos,I was not expecting the powerful filmmaking ex...
3,neg,"This so-called ""documentary"" tries to tell tha..."
4,pos,This show has been my escape from reality for ...


In [10]:
X = df['review']
y = df['label']

X_train, X_test, y_train, y_test = train_test_split(X,y,
                                                   random_state=42,
                                                   train_size=0.3)

In [11]:
text_clf = Pipeline([('tfid',TfidfVectorizer()),('svm',LinearSVC())])

In [12]:
text_clf.fit(X_train,y_train)

Pipeline(steps=[('tfid', TfidfVectorizer()), ('svm', LinearSVC())])

In [13]:
y_preds = text_clf.predict(X_test)

In [14]:
confusion_matrix(y_test,y_preds)

array([[1876,  219],
       [ 162, 1929]], dtype=int64)

In [15]:
print(classification_report(y_test,y_preds))

              precision    recall  f1-score   support

         neg       0.92      0.90      0.91      2095
         pos       0.90      0.92      0.91      2091

    accuracy                           0.91      4186
   macro avg       0.91      0.91      0.91      4186
weighted avg       0.91      0.91      0.91      4186



In [16]:
print(accuracy_score(y_test,y_preds))

0.9089823220258003


In [17]:
text_clf.predict(["""Negative is a rare treat. It is a limited budget Indie action film that has the look, feel, and heartfelt acting of a high-quality big budget movie. Katia Winter gives Academy Award worthy performance. Would be terrific to see on the big screen. No spoilers. Just sit back and enjoy as the story unfolds. Would Rate as 12 if possible."""])

array(['pos'], dtype=object)