# Applying a Sentimental Analysis on the people feedback and identifying whether liked the session or not  

In [1]:
import pandas as pd 

data = pd.read_csv(r"C:\Users\AMITESH\Downloads\Reviews.csv") 
data.head()

Unnamed: 0,Review,feedback
0,Wow... Loved this place.,like
1,Crust is not good.,dislike
2,Not tasty and the texture was just nasty.,dislike
3,Stopped by during the late May bank holiday of...,like
4,The selection on the menu was great and so wer...,like


In [2]:
data.isnull().sum()

Review      0
feedback    0
dtype: int64

In [3]:
data['feedback'].value_counts()

like       501
dislike    499
Name: feedback, dtype: int64

# Removing Stopwords 

In [4]:
import spacy
from spacy.lang.en.stop_words import STOP_WORDS as stopwords

stopwords.remove('not')
data['Review'] = data['Review'].apply(lambda x:' '.join([t for t in x.split() if t not in set(stopwords)]))
data

Unnamed: 0,Review,feedback
0,Wow... Loved place.,like
1,Crust not good.,dislike
2,Not tasty texture nasty.,dislike
3,Stopped late May bank holiday Rick Steve recom...,like
4,The selection menu great prices.,like
...,...,...
995,I think food flavor texture lacking.,dislike
996,Appetite instantly gone.,dislike
997,Overall I not impressed not back.,dislike
998,"The experience underwhelming, I think we'll Ni...",dislike


# removing Special characters and punctuation 

In [5]:
import re 

data['Review'] = data['Review'].apply(lambda x : re.sub(r'[^\w]', " ", x))
data

Unnamed: 0,Review,feedback
0,Wow Loved place,like
1,Crust not good,dislike
2,Not tasty texture nasty,dislike
3,Stopped late May bank holiday Rick Steve recom...,like
4,The selection menu great prices,like
...,...,...
995,I think food flavor texture lacking,dislike
996,Appetite instantly gone,dislike
997,Overall I not impressed not back,dislike
998,The experience underwhelming I think we ll Ni...,dislike


In [14]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(data['Review'],data['feedback'], test_size = 0.2, random_state = 0)


In [15]:
X_train

687     The decor nice  piano music soundtrack pleasant 
500    I taste Mom s multi grain pumpkin pancakes pec...
332            We fantastic service  pleased atmosphere 
979           I kept looking time soon 35 minutes  food 
817          The seasonal fruit fresh white peach puree 
                             ...                        
835      I paid bill not tip I felt server terrible job 
192                   When order arrived  gyros missing 
629    The staff super friendly helpful  especially c...
559                                     I  heart  place 
684                                     Damn good steak 
Name: Review, Length: 800, dtype: object

# Bag of Words 

In [12]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer()
X_train = vectorizer.fit_transform(X_train)

# Support Vector Machine and pipeline 

In [16]:
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC

clf = Pipeline([('tfidf', TfidfVectorizer()),('clf', SVC(C= 1000, gamma = 'auto'))])
clf.fit(X_train, Y_train)

Pipeline(memory=None,
         steps=[('tfidf',
                 TfidfVectorizer(analyzer='word', binary=False,
                                 decode_error='strict',
                                 dtype=<class 'numpy.float64'>,
                                 encoding='utf-8', input='content',
                                 lowercase=True, max_df=1.0, max_features=None,
                                 min_df=1, ngram_range=(1, 1), norm='l2',
                                 preprocessor=None, smooth_idf=True,
                                 stop_words=None, strip_accents=None,
                                 sublinear_tf=False,
                                 token_pattern='(?u)\\b\\w\\w+\\b',
                                 tokenizer=None, use_idf=True,
                                 vocabulary=None)),
                ('clf',
                 SVC(C=1000, cache_size=200, class_weight=None, coef0=0.0,
                     decision_function_shape='ovr', degree=3, gamma='aut

In [17]:
Y_predict = clf.predict(X_test)

In [18]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
confusion_matrix(Y_test, Y_predict)

array([[82, 15],
       [20, 83]], dtype=int64)

In [19]:
print(classification_report(Y_test, Y_predict))

              precision    recall  f1-score   support

     dislike       0.80      0.85      0.82        97
        like       0.85      0.81      0.83       103

    accuracy                           0.82       200
   macro avg       0.83      0.83      0.82       200
weighted avg       0.83      0.82      0.83       200



In [20]:
print( " The accuracy level is =", accuracy_score(Y_test, Y_predict)*100, "%")

 The accuracy level is = 82.5 %


# Validation on real time messages 

In [21]:
clf.predict(['I loved the session conducted'])

array(['like'], dtype=object)

In [22]:
clf.predict(['I Hate the session conducted'])

array(['dislike'], dtype=object)

In [25]:
clf.predict(['The class wasnt more clear'])

array(['dislike'], dtype=object)

In [26]:
clf.predict(['The professor was not having knowledge'])

array(['dislike'], dtype=object)