In [1]:
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
import boto3

In [2]:
# Open calendar and listings data
boston_reviews = pd.read_csv("boston_airbnb_data/reviews.csv")

In [14]:
# Create a connection to amazon comprehend service
client = boto3.client('comprehend')

In [15]:
def sentiment_comments_todf(df,client):
    '''
    INPUT
    df - Boston reviews dataframe
    client - Boto3 comprehend client
    
    OUTPUT
    mean_df - A dataframe with the mean sentiment score per property
    '''
    result_dict = {}
    size = len(df)
    general_count,correct_count,fail_count = 1,1,1
    for index, row in df.iterrows():
        general_percentage = round(general_count*100/size,2)
        correct_percentage =round(correct_count*100/general_count,2)
        fail_percentage = round(fail_count*100/general_count,2)
        print("Progress:{}%, Correct:{}%, Fail:{}% ............".format(general_percentage,correct_percentage,
                                                                        fail_percentage),end='\r')
        general_count += 1
        # Extract listing id and comment from row
        listing_id = row[0]
        comment = row[5]
        # Use aws comprehend to extract the sentiment of the comment
        try:
            # Use aws comprehend to detect the sentiment of the select comment
            response = client.detect_sentiment(Text=comment,LanguageCode='en')
            # Add data to the dictionary
            if listing_id not in result_dict.keys():
                result_dict[listing_id]={"Positive":[response["SentimentScore"]["Positive"]],
                                         "Negative":[response["SentimentScore"]["Negative"]],
                                         "Neutral":[response["SentimentScore"]["Neutral"]],
                                         "Mixed":[response["SentimentScore"]["Mixed"]]}
            else:
                result_dict[listing_id]["Positive"].append(response["SentimentScore"]["Positive"])
                result_dict[listing_id]["Negative"].append(response["SentimentScore"]["Negative"])
                result_dict[listing_id]["Neutral"].append(response["SentimentScore"]["Neutral"])
                result_dict[listing_id]["Mixed"].append(response["SentimentScore"]["Mixed"])

            correct_count += 1
        except:
            fail_count += 1
            continue
            
    resume_dict = {}
    # Obtain the mean of each sentiment per property
    for listing_id in result_dict.keys():
        mean_positive = round(sum(result_dict[listing_id]["Positive"])*100/len(result_dict[listing_id]["Positive"]),2)
        mean_negative = round(sum(result_dict[listing_id]["Negative"])*100/len(result_dict[listing_id]["Negative"]),2)
        mean_neutral = round(sum(result_dict[listing_id]["Neutral"])*100/len(result_dict[listing_id]["Neutral"]),2)
        mean_mixed = round(sum(result_dict[listing_id]["Mixed"])*100/len(result_dict[listing_id]["Mixed"]),2)
        number_reviews = len(result_dict[listing_id]["Positive"])

        resume_dict[listing_id]={"mean_positive":mean_positive,"mean_negative":mean_negative,"mean_neutral":mean_neutral,
                                "mean_mixed":mean_mixed,"number_reviews":number_reviews}
        
    # Transform the dictionary with sentiment by property to dataframe  
    mean_df = pd.DataFrame.from_dict(resume_dict,orient='index')
    mean_df.index.name = 'listing_id'
    # Save the dataframe as a csv file
    mean_df.to_csv("mean_sentiment_comments.csv") 
    
    return mean_df

In [16]:
mean_df = sentiment_comments_todf(boston_reviews,client)

Progress:100.0%, Correct:99.92%, Fail:0.08% ............