In [5]:
import pandas as pd
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import  accuracy_score, f1_score
from sklearn import svm
import pickle

# Load Data

In [6]:
data = pd.read_csv("../Naive_Bayes/labeledTrainData.tsv",sep='\t')
data.head()

Unnamed: 0,id,sentiment,review
0,5814_8,1,With all this stuff going down at the moment w...
1,2381_9,1,"\The Classic War of the Worlds\"" by Timothy Hi..."
2,7759_3,0,The film starts with a manager (Nicholas Bell)...
3,3630_4,0,It must be assumed that those who praised this...
4,9495_8,1,Superbly trashy and wondrously unpretentious 8...


# We split the data into training and testing set:

In [7]:
y_train=data['sentiment'].values
x_train=data['review'].values
train_data,test_data,train_labels,test_labels=train_test_split(x_train,y_train,shuffle=True,test_size=0.25,random_state=42,stratify=y_train)

In [8]:
en_stopwords = set(stopwords.words("english")) 

vectorizer = TfidfVectorizer(
    min_df = 5,
    max_df = 0.8,
    sublinear_tf = True,
    use_idf = True,
    analyzer = 'word',
    lowercase = True,
    stop_words = en_stopwords
    )

train_vectors = vectorizer.fit_transform(train_data)
test_vectors = vectorizer.transform(test_data)

# Creating SVM Model

In [10]:
# Perform classification with SVM, kernel=linear
svm_model = svm.SVC(kernel='linear')
svm_model.fit(train_vectors, train_labels)
pred = svm_model.predict(test_vectors)

# check accuracy

In [11]:

# Testing phase 
accuracy_score = round(accuracy_score(test_labels,pred),3)
print("Accuracy: ",accuracy_score)
print("F1: ",round(f1_score(test_labels, pred),3))


Accuracy:  0.895
F1:  0.896


# test

In [13]:
review = ["love",'used to love']
review_vector = vectorizer.transform(review) # vectorizing
output = svm_model.predict(review_vector)
for rev,out in zip(review,output):
    print(rev,":","pos" if out==1 else "neg")

love : pos
used to love : pos


# save model

In [14]:
# pickling the vectorizer
pickle.dump(vectorizer, open('vectorizer.sav', 'wb'))
# pickling the model
pickle.dump(svm_model, open('model.sav', 'wb'))

# Load Model

In [15]:
model = pickle.load(open('model.sav','rb'))
vectorizer = pickle.load(open('vectorizer.sav','rb'))

text = 'welcome to nepal'
vectorize_text = vectorizer.transform([text])
out = model.predict(vectorize_text)
print(text,":","pos" if out ==1 else "neg")

welcome to nepal : pos
