# Sentiment Analysis
Analyses the sentiment of all comments of a listing, gives each listing a sentiment score based on the average of all comments, appends it onto the data set

In [28]:
import numpy as np
import seaborn as sb
import pandas as pd
import nltk
import matplotlib.pyplot as plt
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer 

reviewData = pd.read_csv("reviews.csv")
reviewDataFrame = pd.DataFrame(reviewData)
reviewData.head()

Unnamed: 0,listing_id,id,date,reviewer_id,reviewer_name,comments
0,7202016,38917982,2015-07-19,28943674,Bianca,Cute and cozy place. Perfect location to every...
1,7202016,39087409,2015-07-20,32440555,Frank,Kelly has a great room in a very central locat...
2,7202016,39820030,2015-07-26,37722850,Ian,"Very spacious apartment, and in a great neighb..."
3,7202016,40813543,2015-08-02,33671805,George,Close to Seattle Center and all it has to offe...
4,7202016,41986501,2015-08-10,34959538,Ming,Kelly was a great host and very accommodating ...


In [29]:
# Check for null values
reviewData.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 84849 entries, 0 to 84848
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   listing_id     84849 non-null  int64 
 1   id             84849 non-null  int64 
 2   date           84849 non-null  object
 3   reviewer_id    84849 non-null  int64 
 4   reviewer_name  84849 non-null  object
 5   comments       84831 non-null  object
dtypes: int64(3), object(3)
memory usage: 3.9+ MB


In [33]:
# Replacing null values
reviewData['comments'].fillna("No comment", inplace = True)
reviewData.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 84849 entries, 0 to 84848
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   listing_id     84849 non-null  int64 
 1   id             84849 non-null  int64 
 2   date           84849 non-null  object
 3   reviewer_id    84849 non-null  int64 
 4   reviewer_name  84849 non-null  object
 5   comments       84849 non-null  object
dtypes: int64(3), object(3)
memory usage: 3.9+ MB


In [39]:
# Creating a Sentiment Intensity Analyzer object. 
sent_obj = SentimentIntensityAnalyzer() 
listing_sentiment = {}

for i in range(len(reviewDataFrame)):
    current_listing_id = reviewDataFrame['listing_id'][i]
    
    # Create an ID entry in the dictionary. [total_sentiment_score, count]
    if current_listing_id not in listing_sentiment:
        listing_sentiment[current_listing_id] = [0,0]
    
    # Using polarity_scores method to determine sentiment
    sentiment_dict = sid_obj.polarity_scores(reviewDataFrame['comments'][i])
    listing_sentiment[current_listing_id][0] += sentiment_dict['compound']
    listing_sentiment[current_listing_id][1] += 1


In [41]:
# Replace the [sentiment_score, count] list of each entry with the average sentiment score
for key in listing_sentiment:
    listing_sentiment[key] = listing_sentiment[key][0]/listing_sentiment[key][1]

print(listing_sentiment)

{7202016: 0.8770937500000001, 3946674: 0.9558484848484847, 7833113: 0.9553, 8308353: 0.9591428571428571, 4277026: 0.8186285714285715, 7735100: 0.55424, 4701141: 0.9694, 7934963: 0.8956277777777777, 2934389: 0.9411272727272727, 6888107: 0.9691846153846153, 7075357: 0.9542, 4494948: 0.9480909090909091, 3242605: 0.8923688073394499, 7763878: 0.9473, 8536441: 0.9690333333333334, 7550234: 0.7815, 2926776: 0.9070714285714286, 7327623: 0.9458238095238094, 1205666: 0.8790479452054794, 136480: 0.9037529411764705, 4681885: 0.8728054054054053, 2420536: 0.8931328947368421, 6002165: 0.9758, 72743: 0.9031021052631579, 7931386: 0.765075, 9061868: 0.9819, 8106277: 0.9306, 1122236: 0.8349844444444444, 6939472: 0.8562045454545454, 6567683: 0.8712684210526317, 1707064: 0.9230184523809528, 7219838: 0.8889222222222224, 5319948: 0.9683272727272727, 6558980: 0.9882, 6958436: 0.8864000000000001, 931758: 0.8593041666666666, 1742425: 0.8847, 8100190: 0.9502714285714287, 6847052: 0.9551000000000001, 2277640: 0.93

In [51]:
# Write the updated data to a new csv file
from csv import writer, reader

listingData = pd.read_csv("listings.csv")
default_text = 'Some Text'
with open('listings.csv', 'r', encoding="utf8") as read_obj, open('listings_sentiment.csv', 'w', newline='', encoding="utf8") as write_obj:
    csv_reader = reader(read_obj)
    csv_writer = writer(write_obj)

    # Create header for new column
    header = next(csv_reader)
    header.append("sentiment_score")
    csv_writer.writerow(header)
    
    # Read each row of the input csv file as list
    for row in csv_reader:
        try:
            row.append(listing_sentiment[int(row[0])])
        except KeyError:
            row.append(None)
            
        csv_writer.writerow(row)