In [166]:
import pandas as pd
import joblib
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import SGDClassifier
from sklearn.multiclass import OneVsRestClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn import svm
import xml.etree.ElementTree as ET
from scipy.sparse import hstack
import numpy as np
import warnings

In [167]:
path_train = r'C:\Users\gaurav\Desktop\sentiment code\Train_English_SB2.xml'
path_test =r'C:\Users\gaurav\Desktop\sentiment code\TEST.xml'

home = r'C:\Users\gaurav\anaconda3\stanford-tagger-4.0.0'
from nltk.tag.stanford import StanfordPOSTagger as POS_Tag
from nltk import word_tokenize
_path_to_model = home + '/models/english-bidirectional-distsim.tagger' 
_path_to_jar = home + '/stanford-postagger.jar'
stanford_tag = POS_Tag(model_filename=_path_to_model, path_to_jar=_path_to_jar)
import os
java_path = "C:\Program Files\Java\jdk-11.0.2\bin\java.exe"
os.environ['JAVAHOME'] = java_path


In [168]:
#xml parser
def get_list(path):
    tree=ET.parse(path)
    root = tree.getroot()
    text_list = []
    opinion_list = []
    for review in root.findall('Review'):
        text_string=""
        opinion_inner_list=[]
        for sent in review.findall('./sentences/sentence'):
            text_string= text_string+ " "+ sent.find('text').text
        text_list.append(text_string)
        for opinion in review.findall('./Opinions/Opinion'):
            opinion_dict = {
                opinion.get('category').replace('#','_'): opinion.get('polarity')
            }
            opinion_inner_list.append(opinion_dict)
        opinion_list.append(opinion_inner_list)
    return text_list,opinion_list

In [169]:
#generate data frame
def get_data_frame(text_list,opinion_list,most_common_aspect):
    data={'Review':text_list}
    df = pd.DataFrame(data)
    if opinion_list:
        for inner_list in opinion_list:
            for _dict in inner_list:
                for key in _dict:
                    if key in most_common_aspect:
                        df.loc[opinion_list.index(inner_list),key]=_dict[key]
    return df

In [170]:
#generate data frame for aspect extraction task
def get_aspect_data_frame(df,most_common_aspect):
    for common_aspect in most_common_aspect:
        df[common_aspect]=df[common_aspect].replace(['positive','negative','neutral','conflict'],[1,1,1,1])
    df = df.fillna(0)
    return df

In [171]:
def posTag(review):
    tagged_text_list=[]
    for text in review:
        tagged_text_list.append(stanford_tag.tag(word_tokenize(text)))
    return tagged_text_list

In [172]:
def get_most_common_aspect(opinion_list):
    import nltk
    opinion= []
    for inner_list in opinion_list:
        for _dict in inner_list:
            for key in _dict:
                opinion.append(key)
    most_common_aspect = [k for k,v in nltk.FreqDist(opinion).most_common(20)]
    return most_common_aspect

In [173]:
def filterTag(tagged_review):
    final_text_list=[]
    for text_list in tagged_review:
        final_text=[]
        for word,tag in text_list:
            if tag in ['NN','NNS','NNP','NNPS','RB','RBR','RBS','JJ','JJR','JJS','VB','VBD','VBG','VBN','VBP','VBZ']:
                final_text.append(word)
        final_text_list.append(' '.join(final_text))
    return final_text_list

In [174]:
train_text_list,train_opinion_list = get_list(path_train)
most_common_aspect = get_most_common_aspect(train_opinion_list)

In [175]:
tagged_text_list_train=joblib.load('tagged_text_list_train.pkl')
final_train_text_list=filterTag(tagged_text_list_train)
df_train = get_data_frame(final_train_text_list,train_opinion_list,most_common_aspect)
df_train_aspect = get_aspect_data_frame(df_train,most_common_aspect)
print(df_train_aspect)

                                                Review  LAPTOP_GENERAL  \
0    Most everything is fine machine speed capacity...             1.0   
1    love size keyboard functions do n't really hav...             1.0   
2    love product is Toshiba has camera always buy ...             1.0   
3    bought laptop Was worst Laptop 've ever bought...             1.0   
4    So far great product High price tag however St...             1.0   
5    laptop is great price has sleek look Runs smoo...             1.0   
6    HP Pavilion DV9000 Notebook PC first got compu...             1.0   
7    Great price computer bought laptop computer pa...             1.0   
8    have loved took box battery is really long fea...             1.0   
9    Waited getting computer has been great change ...             1.0   
10   had most features power wanted replace desktop...             1.0   
11   MacBook is outstanding product great value eas...             1.0   
12   image is great soud is excelent l

In [176]:
df_train_aspect = df_train_aspect.reindex(sorted(df_train_aspect.columns), axis=1)

In [177]:
#Similar for test list
test_text_list,test_opinion_list = get_list(path_test)
tagged_text_list_test=joblib.load('tagged_text_list_test.pkl')
final_test_text_list=filterTag(tagged_text_list_test)

In [178]:
df_test = get_data_frame(final_test_text_list,test_opinion_list,most_common_aspect)
df_test_aspect = get_aspect_data_frame(df_test,most_common_aspect)
df_test_aspect = df_test_aspect.reindex(sorted(df_test_aspect.columns), axis=1)

In [179]:
#Sort the data frame according to aspect's name and separate data(X) and target(y)
df_train_aspect = df_train_aspect.sample(frac=1).reset_index(drop=True)
X_train= df_train_aspect.Review
y_train = df_train_aspect.drop('Review',1)
df_test_aspect = df_test_aspect.sample(frac=1).reset_index(drop=True) 
X_test = df_test_aspect.Review
y_test = df_test_aspect.drop('Review',1)
final_most_common_aspect = list(y_train)

In [180]:
print(y_train)

     BATTERY_OPERATION_PERFORMANCE  COMPANY_GENERAL  DISPLAY_DESIGN_FEATURES  \
0                              1.0              0.0                      1.0   
1                              1.0              1.0                      0.0   
2                              1.0              0.0                      0.0   
3                              1.0              0.0                      0.0   
4                              0.0              0.0                      0.0   
5                              1.0              0.0                      1.0   
6                              0.0              0.0                      0.0   
7                              0.0              0.0                      0.0   
8                              0.0              0.0                      0.0   
9                              0.0              0.0                      0.0   
10                             0.0              0.0                      0.0   
11                             0.0      

In [181]:
print(y_test)


   BATTERY_OPERATION_PERFORMANCE COMPANY_GENERAL DISPLAY_DESIGN_FEATURES  \
0                              0               0                           
1                                                                      0   
2                              0                                       0   
3                              0               0                       0   
4                                              0                       0   
5                              0               0                       0   
6                              0                                       0   
7                              0               0                       0   
8                                              0                       0   
9                              0               0                       0   
10                             0               0                       0   
11                                             0                       0   
12          

In [182]:
#y_test.columns = [''] * len(y_test.columns)

In [183]:
#Change y_train to numpy array
import numpy as np
y_train = np.asarray(y_train, dtype=np.int64)
y_test  = np.asarray(y_test)

In [184]:
print(y_train)
print(np.size(y_train))
print(np.size(y_train,1))
print(np.size(y_train,0))

[[1 0 1 0 0 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0]
 [1 1 0 0 0 0 0 1 1 0 0 1 0 1 0 0 0 0 0 1]
 [1 0 0 0 1 0 0 1 1 1 0 0 0 0 1 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 0 0]
 [1 0 1 0 0 0 0 1 1 1 1 1 0 0 1 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0]
 [0 0 0 0 0 0 0 0 1 0 0 0 1 1 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0]
 [0 0 0 0 0 1 0 1 1 0 0 0 0 0 1 0 0 0 0 0]
 [0 0 0 0 1 0 0 1 1 0 0 0 0 0 1 0 0 0 0 0]
 [0 0 0 0 0 0 0 1 1 0 1 0 0 1 0 0 0 0 0 0]
 [0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 1 0 1 1 0 0 1 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 1 1 0 0 1 1 0 1 0 0 0 0 0]
 [0 0 0 0 0 0 0 1 1 0 0 0 1 1 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 1 0 0 0

In [185]:
print(y_test)
print(np.size(y_test))
print(np.size(y_test,1))
print(np.size(y_test,0))

[[0 0 '' 0 0 0 0 '' '' 0 '' 0 '' 0 0 0 0 '' '' 0]
 ['' '' 0 0 '' '' 0 '' '' 0 '' 0 '' 0 0 0 0 0 0 0]
 [0 '' 0 0 0 0 0 0 '' 0 '' 0 0 '' 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 '' '' 0 '' 0 '' 0 '' 0 0 0 0 0]
 ['' 0 0 0 0 0 0 '' '' '' 0 0 '' 0 0 0 0 0 0 0]
 [0 0 0 0 '' 0 0 '' '' 0 '' 0 0 0 0 0 0 '' 0 0]
 [0 '' 0 0 0 0 0 '' '' 0 '' 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 '' 0 0 '' 0 0 0 0 '' 0 0 0 0 0 0]
 ['' 0 0 0 0 0 0 0 '' 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 '' 0 '' 0 '' '' '' 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 '' '' '' 0 0 0 '' 0 0 0 0 0]
 ['' 0 0 0 0 0 0 '' '' 0 0 0 '' 0 0 0 0 0 0 0]
 [0 '' 0 0 0 0 0 0 '' '' '' 0 0 '' 0 0 0 0 0 '']
 ['' 0 '' 0 '' 0 0 '' '' '' '' 0 '' '' '' 0 0 '' 0 '']
 [0 0 0 0 0 0 0 '' '' '' '' 0 '' 0 '' 0 0 0 0 0]
 ['' 0 0 0 0 0 0 0 '' 0 0 0 0 0 0 0 0 0 0 '']
 [0 0 0 0 0 0 0 0 '' 0 '' 0 0 0 '' 0 0 0 '' 0]
 [0 0 0 0 '' 0 0 '' '' 0 '' 0 0 0 '' 0 0 0 0 0]
 [0 0 0 0 '' 0 0 0 '' 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 '' 0 0 0 '' '' '' 0 0 0 '' '' 0 0 0 0]
 ['' 0 0 0 0 0 0 '' '' '' '' '' '' 0 0 0

In [186]:
import random
#y_test[isinstance(y_test,str)] = random.choice([0,1])
#y_test[isinstance(y_test,str)] = 0

In [187]:
for i in range(0,80):
    for j in range(0,20):
        #print(y_test[i,j])
        if  isinstance(y_test[i,j],str):
            if i%2== 0 :
                y_test[i,j] = int(0)
            else :
                y_test[i,j] = int(1)
            #y_test[i,j] = random.choice([int(0),int(1)])


In [188]:
print(y_test)

[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 1 0 0 1 1 0 1 1 0 1 0 1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 1 1 0 1 0 1 0 1 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 1 0 0 1 1 0 1 0 0 0 0 0 0 1 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 1 0 1 0 1 1 1 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 0 1 0 1 0 0 1 1 1 1 0 1 1 1 0 0 1 0 1]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 1 0 0 1 1 0 1 0 0 0 1 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 1 0 0 0 1 1 1 0 0 0 1 1 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 1 0 0 0 1 0 1 0 1 0 0 0 1 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 1

In [189]:
count = 0
print(np.size(y_test))
print(np.size(y_test,1))
print(np.size(y_test,0))
for i in range(0,80):
    for j in range(0,20):
        print(type(y_test[i,j]))
        count = count+1
print(count)


1600
20
80
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'i

<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class

In [190]:
y_test = y_test.astype(np.int32)

In [191]:
count = 0
print(np.size(y_test))
print(np.size(y_test,1))
print(np.size(y_test,0))
for i in range(0,80):
    for j in range(0,20):
        print(type(y_test[i,j]))
        count = count+1
print(count)

1600
20
80
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>

<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'nu

In [192]:
from sklearn.feature_extraction.text import CountVectorizer
from nltk import word_tokenize          
from nltk.stem import WordNetLemmatizer 
vect = CountVectorizer(max_df=1.0,stop_words='english')  
X_train_dtm = vect.fit_transform(X_train)
X_test_dtm = vect.transform(X_test)

In [193]:
#Create various models.
nb_classif = OneVsRestClassifier(MultinomialNB()).fit(X_train_dtm, y_train)
C = 1.0
svc = OneVsRestClassifier(svm.SVC(kernel='linear', C=C)).fit(X_train_dtm, y_train)
lin_svc = OneVsRestClassifier(svm.LinearSVC(C=C)).fit(X_train_dtm, y_train)
sgd = OneVsRestClassifier(SGDClassifier()).fit(X_train_dtm,y_train)

In [194]:
#Predict 
y_pred_class = nb_classif.predict(X_test_dtm)
y_pred_class_svc = svc.predict(X_test_dtm)
y_pred_class_lin_svc = lin_svc.predict(X_test_dtm)
y_pred_class_sgd = sgd.predict(X_test_dtm)

In [195]:
#print(y_pred_class)


In [196]:
count = 0
print(np.size(y_pred_class))
print(np.size(y_pred_class,1))
print(np.size(y_pred_class,0))
for i in range(0,80):
    for j in range(0,20):
        print(type(y_pred_class[i,j]))
        count = count+1
print(count)


1600
20
80
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.int32'>

In [197]:
#Following code to test metrics of all aspect extraction classifiers
from sklearn import metrics

In [198]:
#import pandas as pd
#pd.set_option('display.max_rows', None, 'display.max_columns', None)
#pd.set_option("display.max_rows", None, "display.max_columns", None)
import sys
import numpy
numpy.set_printoptions(threshold=sys.maxsize)

#print(y_test)

In [199]:
#print(y_pred_class)

In [153]:
print("printing accuracy score of different models:-")
print("multinomial naive bayes classifier:-",metrics.accuracy_score(y_test,y_pred_class))
print("support vector classifier:-" ,metrics.accuracy_score(y_test,y_pred_class_svc))
print("linear support vector classifier",metrics.accuracy_score(y_test,y_pred_class_lin_svc))
print("sgd classifier",metrics.accuracy_score(y_test,y_pred_class_sgd))

printing accuracy score of different models:-
multinomial naive bayes classifier:- 0.025
support vector classifier:- 0.0625
linear support vector classifier 0.0625
sgd classifier 0.0125


In [154]:
print("printing precision score of different models:-")
print("multinomial naive bayes classifier:-",metrics.precision_score(y_test,y_pred_class,average='micro'))
print("support vector classifier:-",metrics.precision_score(y_test,y_pred_class_svc,average='micro'))
print("linear support vector classifier:-",metrics.precision_score(y_test,y_pred_class_lin_svc,average='micro'))
print("sgd classifier:-",metrics.precision_score(y_test,y_pred_class_sgd,average='micro'))

printing precision score of different models:-
multinomial naive bayes classifier:- 0.376984126984127
support vector classifier:- 0.35561497326203206
linear support vector classifier:- 0.3646723646723647
sgd classifier:- 0.3282208588957055


In [155]:
print("printing recall score of different models:-")
print("multinomial naive bayes classifier:-",metrics.recall_score(y_test,y_pred_class,average='micro'))
print("support vector classifier:-",metrics.recall_score(y_test,y_pred_class_svc,average='micro'))
print("linear support vector classifier:-",metrics.recall_score(y_test,y_pred_class_lin_svc,average='micro'))
print("sgd classifier:-",metrics.recall_score(y_test,y_pred_class_sgd,average='micro'))

printing recall score of different models:-
multinomial naive bayes classifier:- 0.4947916666666667
support vector classifier:- 0.6927083333333334
linear support vector classifier:- 0.6666666666666666
sgd classifier:- 0.5572916666666666


In [156]:
print("printing F1 score of different models:-")
print("multinomial naive bayes classifier:-",metrics.f1_score(y_test,y_pred_class,average='micro'))
print("support vector classifier:-",metrics.f1_score(y_test,y_pred_class_svc,average='micro'))
print("linear support vector classifier:-",metrics.f1_score(y_test,y_pred_class_lin_svc,average='micro'))
print("sgd classifier:-",metrics.f1_score(y_test,y_pred_class_sgd,average='micro'))

printing F1 score of different models:-
multinomial naive bayes classifier:- 0.4279279279279279
support vector classifier:- 0.46996466431095407
linear support vector classifier:- 0.4714548802946593
sgd classifier:- 0.4131274131274131


In [157]:
#printing classification report of different models
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    print(metrics.classification_report(y_test, y_pred_class))
    print(metrics.classification_report(y_test, y_pred_class_svc))
    print(metrics.classification_report(y_test, y_pred_class_lin_svc))
    print(metrics.classification_report(y_test, y_pred_class_sgd))

              precision    recall  f1-score   support

           0       0.67      0.29      0.40         7
           1       0.47      0.53      0.50        15
           2       0.00      0.00      0.00         2
           3       0.00      0.00      0.00         0
           4       0.00      0.00      0.00        14
           5       0.00      0.00      0.00         5
           6       0.00      0.00      0.00         3
           7       0.27      0.56      0.37        16
           8       0.50      1.00      0.67        40
           9       0.33      0.33      0.33         9
          10       0.33      0.77      0.46        22
          11       0.00      0.00      0.00         2
          12       0.14      0.17      0.15        12
          13       0.26      0.50      0.34        12
          14       0.46      0.50      0.48        12
          15       0.00      0.00      0.00         1
          16       0.00      0.00      0.00         2
          17       0.00    