In [1]:
import nltk
#training dataset is imported from NLTK
nltk.download('movie_reviews')

import re 
import nltk.classify.util
from nltk.classify import NaiveBayesClassifier
from nltk.corpus import movie_reviews

[nltk_data] Downloading package movie_reviews to
[nltk_data]     C:\Users\dell\AppData\Roaming\nltk_data...
[nltk_data]   Package movie_reviews is already up-to-date!


In [2]:
#function to create dictionary of positive and negative words
def extract_features(word_list):
    return dict([(word, True) for word in word_list])

# Load positive and negative reviews using the tags mentioned in the training dataset  
positive_fileids = movie_reviews.fileids('pos')
negative_fileids = movie_reviews.fileids('neg')

# Gathering Positive and negative key words in a dictionary by calling the function
features_positive = [(extract_features(movie_reviews.words(fileids=[f])),'Positive') for f in positive_fileids]
features_negative = [(extract_features(movie_reviews.words(fileids=[f])),'Negative') for f in negative_fileids]

# Split the data into train and test (80/20)
#threshold set to 0.8 to split the data into 80% training dataset and 20% testing dataset
threshold_factor = 0.8
threshold_positive = int(threshold_factor * len(features_positive)) #length of 80% positive words+ length of 80% of the negative words for training dataset
threshold_negative = int(threshold_factor * len(features_negative)) #remaining length of 20% each positive and negative words for testing dataset

#80% of positive words+80% of the negative words as training dataset
features_train = features_positive[:threshold_positive] + features_negative[:threshold_negative]
#remaining 20% each positive and negative words as testing dataset
features_test = features_positive[threshold_positive:] + features_negative[threshold_negative:]

print ("\nNumber of training datapoints:", len(features_train))
print ("Number of test datapoints:", len(features_test))

# Training the model using Naive Bayes classifier
classifier = NaiveBayesClassifier.train(features_train)
print ("\nAccuracy of the classifier:", nltk.classify.util.accuracy(classifier, features_test))

# Sample input reviews for checking the sentimental analysis given by the model
input_reviews = [
    "i dont like the upholstry in this car", 
]

# Saved model is run on the tweets which we got from Twitter

#for counting number of positive reviews.
p = 0 
#for counting number of negative reviews.
n = 0 
with open('tweets.csv',encoding="utf-8") as f:
    #checking sentimental for the first 10 tweets collected
    for i in range(10):
        Review = f.readline()
        print ("\nReview:", Review)
        #calculating probability distribution
        probdist = classifier.prob_classify(extract_features(Review.split()))
        pred_sentiment = probdist.max()
        print ("Predicted sentiment:", pred_sentiment )
        print ("Probability:", round(probdist.prob(pred_sentiment), 2))
        if pred_sentiment=="Positive":
            p+=1
        elif pred_sentiment=="Negative":
            n+=1

    print("Overall response average:", (p/(p+n)))
i = 0


Number of training datapoints: 1600
Number of test datapoints: 400

Accuracy of the classifier: 0.735

Review: Congrats 

Predicted sentiment: Positive
Probability: 0.5

Review: M3 on the lift this morning finishing up with rear sub frame replacement #bmw #bmwm3 #m3 #wedoitall #wemakeitbetter #nolimits #azevedomotorsports #lovemyjob #lindenj #newjersey #portuguese @Azevedo Motorsports https://www.instagram.com/p/B9O1hnxn6uQ/?igshid=16k8llqvzxl6c

Predicted sentiment: Positive
Probability: 0.53

Review: Happy Monday ... Here is the completed M coupe we delivered last week... Car came in for some minor insurance work and ended up getting a full restoration .. #Bmw #bmwmcoupe #restoration #wemakeitnewagain #nolimits… https://www.instagram.com/p/B9OwfM0nwwF/?igshid=10mpbw02l3lyi

Predicted sentiment: Positive
Probability: 0.88

Review: ///M Power Donuts mlit2.0 yousef.zarrouk @ffreshboii96 Tag us #bmw_world_ua @New York, New York https://www.instagram.com/p/B9OoPkwFviN/?igshid=1xdkfx69zxv

In [7]:
# Sample input reviews
input_reviews = [
    "i dont like the upholstry in this car", 
]

p = 0
n = 0

i = 0

#checking sentimental analysis for the given input
for review in input_reviews:

   print ("\nReview:", review)
   probdist = classifier.prob_classify(extract_features(review.split()))
   pred_sentiment = probdist.max()

   print ("Predicted sentiment:", pred_sentiment )
   print ("Probability:", round(probdist.prob(pred_sentiment), 2))



Review: i dont like the upholstry in this car
Predicted sentiment: Negative
Probability: 0.84


In [4]:
# Sample input reviews
input_reviews = [
    "This car is amazing", 
]

p = 0
n = 0

i = 0

for review in input_reviews:

   print ("\nReview:", review)
   probdist = classifier.prob_classify(extract_features(review.split()))
   pred_sentiment = probdist.max()

   print ("Predicted sentiment:", pred_sentiment )
   print ("Probability:", round(probdist.prob(pred_sentiment), 2))



Review: This car is amazing
Predicted sentiment: Positive
Probability: 0.52


In [5]:
# Sample input reviews
input_reviews = [
    "the car has a gay color", 
]

p = 0
n = 0

i = 0

for review in input_reviews:

   print ("\nReview:", review)
   probdist = classifier.prob_classify(extract_features(review.split()))
   pred_sentiment = probdist.max()

   print ("Predicted sentiment:", pred_sentiment )
   print ("Probability:", round(probdist.prob(pred_sentiment), 2))



Review: the car has a gay color
Predicted sentiment: Positive
Probability: 0.69


In [6]:
# Sample input reviews
input_reviews = [
    "This car is not good", 
]

p = 0
n = 0

i = 0

for review in input_reviews:

   print ("\nReview:", review)
   probdist = classifier.prob_classify(extract_features(review.split()))
   pred_sentiment = probdist.max()

   print ("Predicted sentiment:", pred_sentiment )
   print ("Probability:", round(probdist.prob(pred_sentiment), 2))



Review: This car is not good
Predicted sentiment: Negative
Probability: 0.61
