# Sentiment Analysis ML approach

### Import dependancies

In [1]:
import os
import time
from __future__ import print_function
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn import svm
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

#%config IPCompleter.greedy=True

### Build training dataset and testing dataset

In [2]:
data_dir = "./data"
classes = ['positiveReviews', 'negativeReviews']

# Read the data
train_data = []
train_labels = []
test_data = []
test_labels = []
for curr_class in classes:
    dirname = os.path.join(data_dir, curr_class)
    for fname in os.listdir(dirname):
        with open(os.path.join(dirname, fname), 'r') as f:
            content = f.read()
            if fname.startswith('12'):
                test_data.append(content)
                test_labels.append(curr_class)
            else:
                train_data.append(content)
                train_labels.append(curr_class)
                


### Geberate feature vector by using TfidfVectorizer

In [3]:
# Create feature vectors
vectorizer = TfidfVectorizer(min_df=5,
                                 max_df = 0.8,
                                 sublinear_tf=True,
                                 use_idf=True)
train_vectors = vectorizer.fit_transform(train_data)
test_vectors = vectorizer.transform(test_data)


### Perfrom training using different ML algos

In [4]:
# Perform classification with MultinomialNB
clf = MultinomialNB()
clf.fit(train_vectors, train_labels)
prediction = clf.predict(test_vectors)

# Perform classification with SVM, kernel=rbf
classifier_rbf = svm.SVC()
classifier_rbf.fit(train_vectors, train_labels)
prediction_rbf = classifier_rbf.predict(test_vectors)
    
# Perform classification with SVM, kernel=linear
classifier_linear = svm.SVC(kernel='linear')
classifier_linear.fit(train_vectors, train_labels)
prediction_linear = classifier_linear.predict(test_vectors)
  
# Perform classification with SVM, kernel=linear
classifier_liblinear = svm.LinearSVC()
classifier_liblinear.fit(train_vectors, train_labels)
prediction_liblinear = classifier_liblinear.predict(test_vectors)

### Test the result of MultinomialNB

In [9]:
# Print results in a nice table for MultinomialNB
print("\nResults for NaiveBayes (MultinomialNB) ")
print(classification_report(test_labels, prediction))
print ("Accuracy of Naive Bayes Algorithm:" + str(accuracy_score(test_labels, prediction)))

print("\n\n\n")
print ("Reviews Prediction")
print ("\nPredicted label is------> "+prediction[10] )
print ("\nMovie Review is ------> \n"+test_data[10])


Results for NaiveBayes (MultinomialNB) 
                 precision    recall  f1-score   support

negativeReviews       0.79      0.87      0.82       611
positiveReviews       0.85      0.77      0.81       611

    avg / total       0.82      0.82      0.82      1222

Accuracy of Naive Bayes Algorithm:0.8158756137479541




Reviews Prediction

Predicted label is------> positiveReviews

Movie Review is ------> 
In a lot of his films (Citizen Kane, Confidential Report, Touch of evil) Orson Welles gave him the role of an exuberant men. In "The Lady from Shanghai" it's the only time I see him holding the role of the victim. The role of the culprit, he gave it to Rita Hayworth, I guess it's because he was in love with her. Therefore, it's an interesting film. But I find the story excellent too. The direction is genius, as usual with Welles : two scenes are particularly brilliant: the one in the aquarium and the final one with the mirrors. This film is brilliant.(10/10)


### Test the result of SVM with rbf kernal

In [11]:
# Print results in a nice table for SVM algorithm with rbf kernal
print("\nResults for SVM algorithm with rbf kernel")
print(classification_report(test_labels, prediction_rbf))
print ("Accuracy of SVM algorithm with rbf kernel:" + str(accuracy_score(test_labels, prediction_rbf)))

print("\n\n\n")
print ("Reviews Prediction")
print ("\nPredicted label is------> "+prediction[10] )
print ("\nMovie Review is ------> \n"+test_data[10])


Results for SVM algorithm with rbf kernel
                 precision    recall  f1-score   support

negativeReviews       0.98      0.31      0.48       611
positiveReviews       0.59      1.00      0.74       611

    avg / total       0.79      0.65      0.61      1222

Accuracy of SVM algorithm with rbf kernel:0.6546644844517185




Reviews Prediction

Predicted label is------> positiveReviews

Movie Review is ------> 
In a lot of his films (Citizen Kane, Confidential Report, Touch of evil) Orson Welles gave him the role of an exuberant men. In "The Lady from Shanghai" it's the only time I see him holding the role of the victim. The role of the culprit, he gave it to Rita Hayworth, I guess it's because he was in love with her. Therefore, it's an interesting film. But I find the story excellent too. The direction is genius, as usual with Welles : two scenes are particularly brilliant: the one in the aquarium and the final one with the mirrors. This film is brilliant.(10/10)


### Test the result of SVM with linear kernal

In [12]:
# Print results in a nice table for SVM algorithm with linear kernal
print("\nResults for SVM algorithm with linear kernel")
print(classification_report(test_labels, prediction_linear))
print ("Accuracy of SVM algorithm with rbf kernel:" + str(accuracy_score(test_labels, prediction_linear)))

print("\n\n\n")
print ("Reviews Prediction")
print ("\nPredicted label is------> "+prediction[10] )
print ("\nMovie Review is ------> \n"+test_data[10])


Results for SVM algorithm with linear kernel
                 precision    recall  f1-score   support

negativeReviews       0.82      0.86      0.84       611
positiveReviews       0.85      0.81      0.83       611

    avg / total       0.84      0.84      0.84      1222

Accuracy of SVM algorithm with rbf kernel:0.8363338788870703




Reviews Prediction

Predicted label is------> positiveReviews

Movie Review is ------> 
In a lot of his films (Citizen Kane, Confidential Report, Touch of evil) Orson Welles gave him the role of an exuberant men. In "The Lady from Shanghai" it's the only time I see him holding the role of the victim. The role of the culprit, he gave it to Rita Hayworth, I guess it's because he was in love with her. Therefore, it's an interesting film. But I find the story excellent too. The direction is genius, as usual with Welles : two scenes are particularly brilliant: the one in the aquarium and the final one with the mirrors. This film is brilliant.(10/10)


### Test the result of SVM with linearSVC

In [13]:
# Print results in a nice table for SVM algorithm with liblinear kernal
print("\nResults for SVM algorithm with liblinear kernel")
print(classification_report(test_labels, prediction_liblinear))
print ("Accuracy of SVM algorithm with rbf kernel:" + str(accuracy_score(test_labels, prediction_liblinear)))

print("\n\n\n")
print ("Reviews Prediction")
print ("\nPredicted label is------> "+prediction[10] )
print ("\nMovie Review is ------> \n"+test_data[10])


Results for SVM algorithm with liblinear kernel
                 precision    recall  f1-score   support

negativeReviews       0.82      0.86      0.84       611
positiveReviews       0.85      0.81      0.83       611

    avg / total       0.84      0.84      0.84      1222

Accuracy of SVM algorithm with rbf kernel:0.8363338788870703




Reviews Prediction

Predicted label is------> positiveReviews

Movie Review is ------> 
In a lot of his films (Citizen Kane, Confidential Report, Touch of evil) Orson Welles gave him the role of an exuberant men. In "The Lady from Shanghai" it's the only time I see him holding the role of the victim. The role of the culprit, he gave it to Rita Hayworth, I guess it's because he was in love with her. Therefore, it's an interesting film. But I find the story excellent too. The direction is genius, as usual with Welles : two scenes are particularly brilliant: the one in the aquarium and the final one with the mirrors. This film is brilliant.(10/10)
