In [1]:
import numpy as np
import pandas as pd
import re
import os

In [2]:
rev = pd.read_csv('merged.csv', index_col=False)
rev

Unnamed: 0,Review_Date,Author_Name,Vehicle_Title,Review_Title,Review,Rating
0,on 10/13/05 15:30 PM (PDT),roadking,2002 Dodge Ram Cargo Van 1500 3dr Van (3.9L 6c...,Great delivery vehicle,It's been a great delivery vehicle for my caf...,4.625
1,on 07/17/05 21:59 PM (PDT),Mark,2002 Dodge Ram Cargo Van 3500 3dr Ext Van (5.2...,Disappointmnet,Bought this car as a commuter vehicle for a v...,2.125
2,on 07/16/02 00:00 AM (PDT),Tom Sheer,2002 Dodge Ram Cargo Van 3500 Maxi 3dr Ext Van...,Sweet van,"This van rocks its the best, lots of \nroom. ...",5.000
3,on 12/29/07 21:57 PM (PST),Keven Smith,2001 Dodge Ram Cargo Van 2500 Maxi 3dr Ext Van...,Keven Smith,Great work vehicle. Drives nice. has lots of ...,4.500
4,on 02/09/05 18:52 PM (PST),VanMan,2001 Dodge Ram Cargo Van 1500 3dr Van (3.9L 6c...,Not what Dodge used to be,Good solid frame and suspension. Well equipp...,2.875
...,...,...,...,...,...,...
241378,on 11/25/02 00:00 AM (PST),radcliff,2003 Chrysler 300M Sedan 4dr Sedan (3.5L 6cyl 4A),BAD EXPERIENCE,"WE LIKED HOW THE CAR HANDLES AND \nDRIVES, BU...",3.250
241379,on 11/20/02 00:00 AM (PST),rocketman,2003 Chrysler 300M Sedan Special 4dr Sedan (3....,Nice Design Poor Build,My 300M is a nicely designed car: good \nlook...,3.000
241380,on 10/03/02 00:00 AM (PDT),sjb,2003 Chrysler 300M Sedan Special 4dr Sedan (3....,2003 Special,I took advantage of the 0% finance after\nloo...,4.875
241381,on 09/21/02 00:00 AM (PDT),dhansen4,2003 Chrysler 300M Sedan 4dr Sedan (3.5L 6cyl 4A),Appraisal,Great car for the money. Easy to,


#### Combining Review_Title and Review columns for the text corpus

In [3]:
rev['Full Review'] = rev['Review_Title'] + rev['Review']
rev.head()

Unnamed: 0,Review_Date,Author_Name,Vehicle_Title,Review_Title,Review,Rating,Full Review
0,on 10/13/05 15:30 PM (PDT),roadking,2002 Dodge Ram Cargo Van 1500 3dr Van (3.9L 6c...,Great delivery vehicle,It's been a great delivery vehicle for my caf...,4.625,Great delivery vehicle It's been a great deliv...
1,on 07/17/05 21:59 PM (PDT),Mark,2002 Dodge Ram Cargo Van 3500 3dr Ext Van (5.2...,Disappointmnet,Bought this car as a commuter vehicle for a v...,2.125,Disappointmnet Bought this car as a commuter v...
2,on 07/16/02 00:00 AM (PDT),Tom Sheer,2002 Dodge Ram Cargo Van 3500 Maxi 3dr Ext Van...,Sweet van,"This van rocks its the best, lots of \nroom. ...",5.0,"Sweet van This van rocks its the best, lots of..."
3,on 12/29/07 21:57 PM (PST),Keven Smith,2001 Dodge Ram Cargo Van 2500 Maxi 3dr Ext Van...,Keven Smith,Great work vehicle. Drives nice. has lots of ...,4.5,Keven Smith Great work vehicle. Drives nice. h...
4,on 02/09/05 18:52 PM (PST),VanMan,2001 Dodge Ram Cargo Van 1500 3dr Van (3.9L 6c...,Not what Dodge used to be,Good solid frame and suspension. Well equipp...,2.875,Not what Dodge used to be Good solid frame and...


#### Using spaCy for Depedency parsing, for making the core part of Aspect Extraction

Dependency parsing is a natural language processing (NLP) technique that analyzes the grammatical structure of a sentence by identifying the relationships between words. The goal of dependency parsing is to identify the head word of a sentence (the main noun or verb that governs the sentence's grammatical structure) and then to determine how other words in the sentence are related to this head word. This is done by assigning a label to each word in the sentence that indicates its relationship to the head word.

In [6]:
import spacy
from tqdm import tqdm

nlp = spacy.load('en_core_web_sm')

In [8]:
# To show the dependency

text = 'Great car and had a long range'
doc = nlp(text)
spacy.displacy.render(doc, style = 'dep', jupyter = True)

In [9]:
# Isolating the compound words

models = ['porsche,' 'mercede','comfortsport', 'mercedes','mercedes-benz', 'honda','toyota','audi', 'benz','bentley','lexus',
                  'nissan','volvo','drive','nt','like','vehicle','infiniti','good','miles','corvette','come','edmund','lotus','diego','snake',
                 'porsche', 'cayman','bought','year','minute','chicago','car','home', 'work','think','suv','people','edmunds',
                  'cabriolet','lexuss','japan','husband','baby','range', 'rover','cadillac','cadillacs','michelin','texas','second',
                   'awsome','one','now', 'take', 'give', 'new','levinson','road','love','sedan','wife','sport','bang','tank',
                   'truck','lemon','imho','pathfinder','infinity','convertible','allroad','conv','bike','ski','grocery','mclass'
                  ,'hardtop','club','hubby','child','zoom','test','etc','brain','ashamed','carmax','alpina','rocketship','great','germany',
                  'autobahn','mercedez', 'dodge', 'ferrari', 'fiat', 'ford', 'genesis', 'hummer', 'hyundai', 'infiniti', 'isuzu', 'jaguar', 'jeep', 'kia',
                  'lamborghini', 'land-rover', 'lexus', 'lincoln', 'lotus', 'maserati', 'maybach', 'mazda', 'mclaren', 'mercedes-benz', 'mercury', 'mini', 
                  'mitsubishi', 'nissan', 'pontiac', 'ram', 'rolls-royce', 'subaru', 'suzuki', 'tesla', 'volvo', 'Bugatti', 'Buick', 'Cadillac', 
                  'Chevrolet', 'chrysler', 'Acura', 'AlfaRomeo', 'AMGeneral', 'AstonMartin', 'Audi', 'Bentley', 'BMW', 'GMC', 'Honda', 'Toyota', 'VolksWagen']

In [12]:
aspect_terms = []
comp_terms = []
easpect_terms = []
ecomp_terms = []
enemy = []
for x in tqdm(range(len(rev['Full Review']))):
    amod_pairs = []   # pairs of words to describe relationship between adjective and noun
    advmod_pairs = []
    compound_pairs = []   # storing compound phrases
    xcomp_pairs = []
    neg_pairs = []
    eamod_pairs = []
    eadvmod_pairs = []
    ecompound_pairs = []
    eneg_pairs = []
    excomp_pairs = []
    enemlist = []

    # dep_ is an attribute that represents the syntactic dependency label between a token and its head or parent word in a sentence.

    # Replacing all the common punctuation marks with blank spaces
    if len(str(rev['Full Review'][x])) != 0:
        lines = str(rev['Full Review'][x]).replace('*',' ').replace('-',' ').replace('so ',' ').replace('be ',' ').replace('are ',' ').replace('just ',' ').replace('get ','').replace('were ',' ').replace('When ','').replace('when ','').replace('again ',' ').replace('where ','').replace('how ',' ').replace('has ',' ').replace('Here ',' ').replace('here ',' ').replace('now ',' ').replace('see ',' ').replace('why ',' ').split('.')       
        for line in lines:
            enem_list = []
            for eny in models:
                enem = re.search(eny,line)
                if enem is not None:
                    enem_list.append(enem.group())
            if len(enem_list)==0:
                doc = nlp(line)
                str1=''
                str2=''
                for token in doc: 
                    if token.pos_ is 'NOUN':
                        for j in token.lefts:   # returning the words that precede the token
                            if j.dep_ == 'compound':   #  a syntactic dependency label that is used to represent a compound word or phrase. A compound is a linguistic construct that consists of two or more words that are combined to create a single concept.
                                compound_pairs.append((j.text+' '+token.text,token.text))
                            if j.dep_ is 'amod' and j.pos_ is 'ADJ': #primary condition   # 'amod' a syntactic dependency label that is used to describe the relationship between an adjective and the noun it modifies in a sentence.
                                str1 = j.text+' '+token.text
                                amod_pairs.append(j.text+' '+token.text)
                                for k in j.lefts:
                                    if k.dep_ is 'advmod': #secondary condition to get adjective of adjectives/adverb
                                        str2 = k.text+' '+j.text+' '+token.text
                                        amod_pairs.append(k.text+' '+j.text+' '+token.text)
                                mtch = re.search(re.escape(str1),re.escape(str2))
                                if mtch is not None:
                                    amod_pairs.remove(str1)
                    if token.pos_ is 'VERB':
                        for j in token.lefts:
                            if j.dep_ is 'advmod' and j.pos_ is 'ADV':
                                advmod_pairs.append(j.text+' '+token.text)
                            if j.dep_ is 'neg' and j.pos_ is 'ADV':
                                neg_pairs.append(j.text+' '+token.text)
                        for j in token.rights:
                            if j.dep_ is 'advmod'and j.pos_ is 'ADV':
                                advmod_pairs.append(token.text+' '+j.text)
                    if token.pos_ is 'ADJ':
                        for j,h in zip(token.rights,token.lefts):
                            if j.dep_ is 'xcomp' and h.dep_ is not 'neg':   # the second word is a non-finite verb that serves as the predicate of a clause that is missing its subject.
                                for k in j.lefts:
                                    if k.dep_ is 'aux':
                                        xcomp_pairs.append(token.text+' '+k.text+' '+j.text)
                            elif j.dep_ is 'xcomp' and h.dep_ is 'neg':
                                if k.dep_ is 'aux':   # the second word is an auxiliary verb that helps to form a verb tense, mood, or voice with the first word.
                                        neg_pairs.append(h.text +' '+token.text+' '+k.text+' '+j.text)
            
            else:
                enemlist.append(enem_list)
                doc = nlp(line)
                str1=''
                str2=''
                for token in doc:
                    if token.pos_ is 'NOUN':
                        for j in token.lefts:
                            if j.dep_ == 'compound':
                                ecompound_pairs.append((j.text+' '+token.text,token.text))
                            if j.dep_ is 'amod' and j.pos_ is 'ADJ': #primary condition
                                str1 = j.text+' '+token.text
                                eamod_pairs.append(j.text+' '+token.text)
                                for k in j.lefts:
                                    if k.dep_ is 'advmod': #secondary condition to get adjective of adjectives
                                        str2 = k.text+' '+j.text+' '+token.text
                                        eamod_pairs.append(k.text+' '+j.text+' '+token.text)
                                mtch = re.search(re.escape(str1),re.escape(str2))
                                if mtch is not None:
                                    eamod_pairs.remove(str1)
                    if token.pos_ is 'VERB':
                        for j in token.lefts:
                            if j.dep_ is 'advmod' and j.pos_ is 'ADV':
                                eadvmod_pairs.append(j.text+' '+token.text)
                            if j.dep_ is 'neg' and j.pos_ is 'ADV':
                                eneg_pairs.append(j.text+' '+token.text)
                        for j in token.rights:
                            if j.dep_ is 'advmod'and j.pos_ is 'ADV':
                                eadvmod_pairs.append(token.text+' '+j.text)
                    if token.pos_ is 'ADJ':
                        for j in token.rights:
                            if j.dep_ is 'xcomp':
                                for k in j.lefts:
                                    if k.dep_ is 'aux':
                                        excomp_pairs.append(token.text+' '+k.text+' '+j.text)
        pairs = list(set(amod_pairs+advmod_pairs+neg_pairs+xcomp_pairs))
        epairs = list(set(eamod_pairs+eadvmod_pairs+eneg_pairs+excomp_pairs))
        for i in range(len(pairs)):
            if len(compound_pairs)!=0:
                for comp in compound_pairs:
                    mtch = re.search(re.escape(comp[1]),re.escape(pairs[i]))
                    if mtch is not None:
                        pairs[i] = pairs[i].replace(mtch.group(),comp[0])
        for i in range(len(epairs)):
            if len(ecompound_pairs)!=0:
                for comp in ecompound_pairs:
                    mtch = re.search(re.escape(comp[1]),re.escape(epairs[i]))
                    if mtch is not None:
                        epairs[i] = epairs[i].replace(mtch.group(),comp[0])
            
    aspect_terms.append(pairs)
    comp_terms.append(compound_pairs)
    easpect_terms.append(epairs)
    ecomp_terms.append(ecompound_pairs)
    enemy.append(enemlist)
rev['compound_nouns'] = comp_terms
rev['aspect_keywords'] = aspect_terms
rev['competition'] = enemy
rev['competition_comp_nouns'] = ecomp_terms
rev['competition_aspects'] = easpect_terms
rev.head()

  if token.pos_ is 'NOUN':
  if j.dep_ is 'amod' and j.pos_ is 'ADJ': #primary condition
  if j.dep_ is 'amod' and j.pos_ is 'ADJ': #primary condition
  if k.dep_ is 'advmod': #secondary condition to get adjective of adjectives
  if token.pos_ is 'VERB':
  if j.dep_ is 'advmod' and j.pos_ is 'ADV':
  if j.dep_ is 'advmod' and j.pos_ is 'ADV':
  if j.dep_ is 'neg' and j.pos_ is 'ADV':
  if j.dep_ is 'neg' and j.pos_ is 'ADV':
  if j.dep_ is 'advmod'and j.pos_ is 'ADV':
  if j.dep_ is 'advmod'and j.pos_ is 'ADV':
  if token.pos_ is 'ADJ':
  if j.dep_ is 'xcomp' and h.dep_ is not 'neg':
  if j.dep_ is 'xcomp' and h.dep_ is not 'neg':
  if k.dep_ is 'aux':
  elif j.dep_ is 'xcomp' and h.dep_ is 'neg':
  elif j.dep_ is 'xcomp' and h.dep_ is 'neg':
  if k.dep_ is 'aux':
  if token.pos_ is 'NOUN':
  if j.dep_ is 'amod' and j.pos_ is 'ADJ': #primary condition
  if j.dep_ is 'amod' and j.pos_ is 'ADJ': #primary condition
  if k.dep_ is 'advmod': #secondary condition to get adjective of adjectiv

Unnamed: 0,Review_Date,Author_Name,Vehicle_Title,Review_Title,Review,Rating,Full Review,compound_nouns,aspect_keywords,competition,competition_comp_nouns,competition_aspects
0,on 10/13/05 15:30 PM (PDT),roadking,2002 Dodge Ram Cargo Van 1500 3dr Van (3.9L 6c...,Great delivery vehicle,It's been a great delivery vehicle for my caf...,4.625,Great delivery vehicle It's been a great deliv...,"[(LX series, series)]",[],"[[vehicle, good, take, great], [nt], [second]]","[(delivery vehicle, vehicle), (delivery vehicl...","[good power, normal maintenance items, great d..."
1,on 07/17/05 21:59 PM (PDT),Mark,2002 Dodge Ram Cargo Van 3500 3dr Ext Van (5.2...,Disappointmnet,Bought this car as a commuter vehicle for a v...,2.125,Disappointmnet Bought this car as a commuter v...,[],"[rough idle, difficult to maneuver]","[[nt, vehicle, car], [nt, vehicle], [nt], [dri...","[(commuter vehicle, vehicle), (van pool, pool)...","[very large turning radius, had previously, la..."
2,on 07/16/02 00:00 AM (PDT),Tom Sheer,2002 Dodge Ram Cargo Van 3500 Maxi 3dr Ext Van...,Sweet van,"This van rocks its the best, lots of \nroom. ...",5.0,"Sweet van This van rocks its the best, lots of...",[],[],"[[car, work, great]]",[],[tow too]
3,on 12/29/07 21:57 PM (PST),Keven Smith,2001 Dodge Ram Cargo Van 2500 Maxi 3dr Ext Van...,Keven Smith,Great work vehicle. Drives nice. has lots of ...,4.5,Keven Smith Great work vehicle. Drives nice. h...,"[(bucket seats, seats), (seats windows, windows)]","[electric seats windows, Easy to handle]","[[vehicle, work]]","[(Smith vehicle, vehicle), (work vehicle, vehi...",[]
4,on 02/09/05 18:52 PM (PST),VanMan,2001 Dodge Ram Cargo Van 1500 3dr Van (3.9L 6c...,Not what Dodge used to be,Good solid frame and suspension. Well equipp...,2.875,Not what Dodge used to be Good solid frame and...,"[(leg room, room), (passenger side, side), (sp...","[Well equipped power, whole passenger side, eq...","[[ram], [nt], [drive], [nt], [nt], [ram], [nt,...","[(Sheet metal, metal), (Paint chips, chips), (...","[scratches easily, mechanical problems, Good f..."


In [13]:
rev.shape

(241383, 12)

### Using vaderSentiment for Sentiment Analysis

VADER( Valence Aware Dictionary for Sentiment Reasoning) is an NLTK module that provides sentiment scores based on the words used.

In [16]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [17]:
analyser = SentimentIntensityAnalyzer()

In [18]:
import operator
sentiment = []
for i in range(len(rev)):
    score_dict = {'pos' : 0, 'neg' : 0, 'neu' : 0}
    if len(rev['aspect_keywords'][i]) != 0:
        for aspects in rev['aspect_keywords'][i]:
            sent = analyser.polarity_scores(aspects)
            score_dict['neg'] += sent['neg']
            score_dict['pos'] += sent['pos']   
        
        sentiment.append(max(score_dict.items(), key = operator.itemgetter(1))[0])
    
    else:
        sentiment.append('NaN')
rev['sentiment'] = sentiment
rev.head()

Unnamed: 0,Review_Date,Author_Name,Vehicle_Title,Review_Title,Review,Rating,Full Review,compound_nouns,aspect_keywords,competition,competition_comp_nouns,competition_aspects,sentiment
0,on 10/13/05 15:30 PM (PDT),roadking,2002 Dodge Ram Cargo Van 1500 3dr Van (3.9L 6c...,Great delivery vehicle,It's been a great delivery vehicle for my caf...,4.625,Great delivery vehicle It's been a great deliv...,"[(LX series, series)]",[],"[[vehicle, good, take, great], [nt], [second]]","[(delivery vehicle, vehicle), (delivery vehicl...","[good power, normal maintenance items, great d...",
1,on 07/17/05 21:59 PM (PDT),Mark,2002 Dodge Ram Cargo Van 3500 3dr Ext Van (5.2...,Disappointmnet,Bought this car as a commuter vehicle for a v...,2.125,Disappointmnet Bought this car as a commuter v...,[],"[rough idle, difficult to maneuver]","[[nt, vehicle, car], [nt, vehicle], [nt], [dri...","[(commuter vehicle, vehicle), (van pool, pool)...","[very large turning radius, had previously, la...",neg
2,on 07/16/02 00:00 AM (PDT),Tom Sheer,2002 Dodge Ram Cargo Van 3500 Maxi 3dr Ext Van...,Sweet van,"This van rocks its the best, lots of \nroom. ...",5.0,"Sweet van This van rocks its the best, lots of...",[],[],"[[car, work, great]]",[],[tow too],
3,on 12/29/07 21:57 PM (PST),Keven Smith,2001 Dodge Ram Cargo Van 2500 Maxi 3dr Ext Van...,Keven Smith,Great work vehicle. Drives nice. has lots of ...,4.5,Keven Smith Great work vehicle. Drives nice. h...,"[(bucket seats, seats), (seats windows, windows)]","[electric seats windows, Easy to handle]","[[vehicle, work]]","[(Smith vehicle, vehicle), (work vehicle, vehi...",[],pos
4,on 02/09/05 18:52 PM (PST),VanMan,2001 Dodge Ram Cargo Van 1500 3dr Van (3.9L 6c...,Not what Dodge used to be,Good solid frame and suspension. Well equipp...,2.875,Not what Dodge used to be Good solid frame and...,"[(leg room, room), (passenger side, side), (sp...","[Well equipped power, whole passenger side, eq...","[[ram], [nt], [drive], [nt], [nt], [ram], [nt,...","[(Sheet metal, metal), (Paint chips, chips), (...","[scratches easily, mechanical problems, Good f...",neg


In [20]:
# Assigning 1 for positive review, 0 for negative review and NaN for NaN review

score = []
for sent in rev['sentiment']:
    if sent is 'NaN':
        score.append('NaN')
    elif sent is 'pos':
        score.append('1')
    else:
        score.append('0')

rev['score'] = score
rev.head()

  if sent is 'NaN':
  elif sent is 'pos':


Unnamed: 0,Review_Date,Author_Name,Vehicle_Title,Review_Title,Review,Rating,Full Review,compound_nouns,aspect_keywords,competition,competition_comp_nouns,competition_aspects,sentiment,score
0,on 10/13/05 15:30 PM (PDT),roadking,2002 Dodge Ram Cargo Van 1500 3dr Van (3.9L 6c...,Great delivery vehicle,It's been a great delivery vehicle for my caf...,4.625,Great delivery vehicle It's been a great deliv...,"[(LX series, series)]",[],"[[vehicle, good, take, great], [nt], [second]]","[(delivery vehicle, vehicle), (delivery vehicl...","[good power, normal maintenance items, great d...",,
1,on 07/17/05 21:59 PM (PDT),Mark,2002 Dodge Ram Cargo Van 3500 3dr Ext Van (5.2...,Disappointmnet,Bought this car as a commuter vehicle for a v...,2.125,Disappointmnet Bought this car as a commuter v...,[],"[rough idle, difficult to maneuver]","[[nt, vehicle, car], [nt, vehicle], [nt], [dri...","[(commuter vehicle, vehicle), (van pool, pool)...","[very large turning radius, had previously, la...",neg,0.0
2,on 07/16/02 00:00 AM (PDT),Tom Sheer,2002 Dodge Ram Cargo Van 3500 Maxi 3dr Ext Van...,Sweet van,"This van rocks its the best, lots of \nroom. ...",5.0,"Sweet van This van rocks its the best, lots of...",[],[],"[[car, work, great]]",[],[tow too],,
3,on 12/29/07 21:57 PM (PST),Keven Smith,2001 Dodge Ram Cargo Van 2500 Maxi 3dr Ext Van...,Keven Smith,Great work vehicle. Drives nice. has lots of ...,4.5,Keven Smith Great work vehicle. Drives nice. h...,"[(bucket seats, seats), (seats windows, windows)]","[electric seats windows, Easy to handle]","[[vehicle, work]]","[(Smith vehicle, vehicle), (work vehicle, vehi...",[],pos,1.0
4,on 02/09/05 18:52 PM (PST),VanMan,2001 Dodge Ram Cargo Van 1500 3dr Van (3.9L 6c...,Not what Dodge used to be,Good solid frame and suspension. Well equipp...,2.875,Not what Dodge used to be Good solid frame and...,"[(leg room, room), (passenger side, side), (sp...","[Well equipped power, whole passenger side, eq...","[[ram], [nt], [drive], [nt], [nt], [ram], [nt,...","[(Sheet metal, metal), (Paint chips, chips), (...","[scratches easily, mechanical problems, Good f...",neg,0.0


In [21]:
# Dividing reviews into positive and negative based on setting a threshold in the ratings given to each, according to given Ratings

import math
pos = []
for i in range(len(rev)):
    if not math.isnan(rev['Rating'][i]):
        if int(rev['Rating'][i]) > 3:   # Setting the threshold as 3 from looking at the overall Ratings given
            pos.append('1')
        else:
            pos.append('0')
    else:
        pos.append('0')

rev['Positive Reviews'] = pos
rev.head()

Unnamed: 0,Review_Date,Author_Name,Vehicle_Title,Review_Title,Review,Rating,Full Review,compound_nouns,aspect_keywords,competition,competition_comp_nouns,competition_aspects,sentiment,score,Positive Reviews
0,on 10/13/05 15:30 PM (PDT),roadking,2002 Dodge Ram Cargo Van 1500 3dr Van (3.9L 6c...,Great delivery vehicle,It's been a great delivery vehicle for my caf...,4.625,Great delivery vehicle It's been a great deliv...,"[(LX series, series)]",[],"[[vehicle, good, take, great], [nt], [second]]","[(delivery vehicle, vehicle), (delivery vehicl...","[good power, normal maintenance items, great d...",,,1
1,on 07/17/05 21:59 PM (PDT),Mark,2002 Dodge Ram Cargo Van 3500 3dr Ext Van (5.2...,Disappointmnet,Bought this car as a commuter vehicle for a v...,2.125,Disappointmnet Bought this car as a commuter v...,[],"[rough idle, difficult to maneuver]","[[nt, vehicle, car], [nt, vehicle], [nt], [dri...","[(commuter vehicle, vehicle), (van pool, pool)...","[very large turning radius, had previously, la...",neg,0.0,0
2,on 07/16/02 00:00 AM (PDT),Tom Sheer,2002 Dodge Ram Cargo Van 3500 Maxi 3dr Ext Van...,Sweet van,"This van rocks its the best, lots of \nroom. ...",5.0,"Sweet van This van rocks its the best, lots of...",[],[],"[[car, work, great]]",[],[tow too],,,1
3,on 12/29/07 21:57 PM (PST),Keven Smith,2001 Dodge Ram Cargo Van 2500 Maxi 3dr Ext Van...,Keven Smith,Great work vehicle. Drives nice. has lots of ...,4.5,Keven Smith Great work vehicle. Drives nice. h...,"[(bucket seats, seats), (seats windows, windows)]","[electric seats windows, Easy to handle]","[[vehicle, work]]","[(Smith vehicle, vehicle), (work vehicle, vehi...",[],pos,1.0,1
4,on 02/09/05 18:52 PM (PST),VanMan,2001 Dodge Ram Cargo Van 1500 3dr Van (3.9L 6c...,Not what Dodge used to be,Good solid frame and suspension. Well equipp...,2.875,Not what Dodge used to be Good solid frame and...,"[(leg room, room), (passenger side, side), (sp...","[Well equipped power, whole passenger side, eq...","[[ram], [nt], [drive], [nt], [nt], [ram], [nt,...","[(Sheet metal, metal), (Paint chips, chips), (...","[scratches easily, mechanical problems, Good f...",neg,0.0,0


In [22]:
d = {'sent' : rev['Positive Reviews'], 'sent_pred' : rev['score']}

comparison_df = pd.DataFrame(data = d)
comparison_df

Unnamed: 0,sent,sent_pred
0,1,
1,0,0
2,1,
3,1,1
4,0,0
...,...,...
241378,0,0
241379,0,
241380,1,
241381,0,


In [23]:
len(comparison_df)

241383

In [24]:
# Removing the unwanted entry(NaN) from the dataframe

comparison_df = comparison_df[comparison_df.sent_pred != 'NaN']

In [29]:
comparison_df


Unnamed: 0,sent,sent_pred
1,0,0
3,1,1
4,0,0
7,1,1
8,0,1
...,...,...
241364,1,1
241365,1,1
241369,1,1
241377,0,1


### Checking the Accuracy of the Predicted Sentiments

In [26]:
from sklearn.metrics import accuracy_score, auc, f1_score, recall_score, precision_score

In [28]:
print('Accuracy is: ')
print(accuracy_score(comparison_df.sent, comparison_df.sent_pred))
print('f1 Score: ')
print(f1_score(comparison_df.sent, comparison_df.sent_pred, pos_label='1'))
print('Recall: ')
print(recall_score(comparison_df.sent, comparison_df.sent_pred, pos_label='1'))
print('Precision: ')
print(precision_score(comparison_df.sent, comparison_df.sent_pred, pos_label='1'))

Accuracy is: 
0.7128734207903457
f1 Score: 
0.8150774052891205
Recall: 
0.8618530476470013
Precision: 
0.7731177251434401
