In [24]:
#Dependencies
import ast
import torch
import pandas as pd
from google.colab import files
from transformers import pipeline

In [25]:
#Set sentiment pipeline
task = "text-classification"
model = "nlptown/bert-base-multilingual-uncased-sentiment"
sentiment_pipeline = pipeline(task, model)

In [26]:
def analyze_sentiments(df, sentiment_pipeline):
    """
    Takes reviews from inspected restaurant and associates them with 'positive'
    or 'negative' sentiment.

    Returns the same dataframe but with an extra column 'sentiment'.
    """
    sentiments_per_row = list()
    # Compute the length of the DataFrame
    num_rows = len(df)

    # Output replacements
    star_to_sentiment = {
        '1 star': 'negative',
        '2 stars': 'negative',
        '3 stars': 'neutral',
        '4 stars': 'positive',
        '5 stars': 'positive'}

    # Traverse dataset's rows
    for index, row in df.iterrows():
      if index % 44 == 0:
        print(f'progress: {round((index+1)/num_rows*100,3)}%')
      stars = [sentiment_pipeline(review[:512])[0]['label'] for review in row['reviews']]
      sentiments = [star_to_sentiment[star] for star in stars]
      sentiments_per_row.append(sentiments)

    df['sentiments'] = sentiments_per_row

    return df

In [27]:
# Upload input file
uploaded = files.upload()

Saving labeled_inspections_with_reviews.csv to labeled_inspections_with_reviews (3).csv


In [28]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
filename = 'labeled_inspections_with_reviews.csv'

# Load data
data = pd.read_csv(filename)

# Clean review data
for column in ['reviews', 'ratings']:
    data[column] = data[column].apply(ast.literal_eval)

data.head()

Unnamed: 0.1,Unnamed: 0,Organization Name,Active Indicator,Public Facility Name,Program Group Type,Address,City,County Name,Zip Code,State,...,is_open,attributes,categories,hours,prev_date,prev_date_with_nulls,reviews,ratings,n_reviews,avg_rating
0,10735,Chester County Health Department,Yes,"158 LICENSE, LLC",ChesterCountyFood,158 W GAY ST,West Chester,Chester,19380.0,PA,...,0.0,"{'RestaurantsPriceRange2': '2', 'RestaurantsAt...","Restaurants, American (New)","{'Monday': '11:0-2:0', 'Tuesday': '11:0-2:0', ...",,2018-03-25 00:00:00,[Our first visit here and probably won't be ba...,"[1.0, 3.0, 1.0, 5.0, 1.0, 5.0, 1.0, 2.0, 5.0, ...",11,2.454545
1,56848,PENNSYLVANIA DEPARTMENT OF AGRICULTURE,Yes,3 B'S LOUNGE,Food,309 S MAIN ST,ZELIENOPLE,Butler,16063.0,PA,...,1.0,"{'RestaurantsGoodForGroups': 'True', 'Business...","American (Traditional), Bars, Nightlife, Sandw...","{'Monday': '11:30-0:0', 'Tuesday': '11:30-0:0'...",,2017-05-21 00:00:00,"[Stopped in for happy hour last night, first t...","[4.0, 5.0]",2,4.5
2,72731,PENNSYLVANIA DEPARTMENT OF AGRICULTURE,Yes,3 B'S LOUNGE,Food,309 S MAIN ST,ZELIENOPLE,Butler,16063.0,PA,...,1.0,"{'RestaurantsGoodForGroups': 'True', 'Business...","American (Traditional), Bars, Nightlife, Sandw...","{'Monday': '11:30-0:0', 'Tuesday': '11:30-0:0'...",2018-05-16 00:00:00,2018-05-16 00:00:00,[Six of us went here for an early Saturday lun...,"[5.0, 1.0, 4.0, 5.0, 1.0, 5.0, 4.0]",7,3.571429
3,289,Chester County Health Department,Yes,30 MAIN,ChesterCountyFood,660 LANCASTER AVE,Berwyn,Chester,19312.0,PA,...,1.0,"{'RestaurantsPriceRange2': '2', 'GoodForMeal':...","American (New), Bars, Venues & Event Spaces, N...","{'Tuesday': '16:0-22:0', 'Wednesday': '16:0-22...",,2017-05-08 00:00:00,[So I read all the reviews and was having seco...,"[2.0, 5.0, 5.0, 3.0, 4.0, 2.0, 1.0, 1.0, 5.0, ...",11,3.363636
4,15080,Chester County Health Department,Yes,30 MAIN,ChesterCountyFood,660 LANCASTER AVE,Berwyn,Chester,19312.0,PA,...,1.0,"{'RestaurantsPriceRange2': '2', 'GoodForMeal':...","American (New), Bars, Venues & Event Spaces, N...","{'Tuesday': '16:0-22:0', 'Wednesday': '16:0-22...",2018-05-03 00:00:00,2018-05-03 00:00:00,"[Ordered through Grub Hub, and the delivery ca...","[1.0, 2.0, 5.0, 2.0, 1.0]",5,2.2


In [29]:
#Find review sentiments
data = analyze_sentiments(data, sentiment_pipeline)

progress: 0.046%
progress: 2.079%
progress: 4.111%
progress: 6.143%
progress: 8.176%
progress: 10.208%
progress: 12.24%
progress: 14.273%
progress: 16.305%
progress: 18.337%
progress: 20.37%
progress: 22.402%
progress: 24.434%
progress: 26.467%
progress: 28.499%
progress: 30.531%
progress: 32.564%
progress: 34.596%
progress: 36.628%
progress: 38.661%
progress: 40.693%
progress: 42.725%
progress: 44.758%
progress: 46.79%
progress: 48.822%
progress: 50.855%
progress: 52.887%
progress: 54.919%
progress: 56.952%
progress: 58.984%
progress: 61.016%
progress: 63.048%
progress: 65.081%
progress: 67.113%
progress: 69.145%
progress: 71.178%
progress: 73.21%
progress: 75.242%
progress: 77.275%
progress: 79.307%
progress: 81.339%
progress: 83.372%
progress: 85.404%
progress: 87.436%
progress: 89.469%
progress: 91.501%
progress: 93.533%
progress: 95.566%
progress: 97.598%
progress: 99.63%


In [31]:
data.head()

Unnamed: 0.1,Unnamed: 0,Organization Name,Active Indicator,Public Facility Name,Program Group Type,Address,City,County Name,Zip Code,State,...,attributes,categories,hours,prev_date,prev_date_with_nulls,reviews,ratings,n_reviews,avg_rating,sentiments
0,10735,Chester County Health Department,Yes,"158 LICENSE, LLC",ChesterCountyFood,158 W GAY ST,West Chester,Chester,19380.0,PA,...,"{'RestaurantsPriceRange2': '2', 'RestaurantsAt...","Restaurants, American (New)","{'Monday': '11:0-2:0', 'Tuesday': '11:0-2:0', ...",,2018-03-25 00:00:00,[Our first visit here and probably won't be ba...,"[1.0, 3.0, 1.0, 5.0, 1.0, 5.0, 1.0, 2.0, 5.0, ...",11,2.454545,"[negative, neutral, negative, negative, negati..."
1,56848,PENNSYLVANIA DEPARTMENT OF AGRICULTURE,Yes,3 B'S LOUNGE,Food,309 S MAIN ST,ZELIENOPLE,Butler,16063.0,PA,...,"{'RestaurantsGoodForGroups': 'True', 'Business...","American (Traditional), Bars, Nightlife, Sandw...","{'Monday': '11:30-0:0', 'Tuesday': '11:30-0:0'...",,2017-05-21 00:00:00,"[Stopped in for happy hour last night, first t...","[4.0, 5.0]",2,4.5,"[positive, positive]"
2,72731,PENNSYLVANIA DEPARTMENT OF AGRICULTURE,Yes,3 B'S LOUNGE,Food,309 S MAIN ST,ZELIENOPLE,Butler,16063.0,PA,...,"{'RestaurantsGoodForGroups': 'True', 'Business...","American (Traditional), Bars, Nightlife, Sandw...","{'Monday': '11:30-0:0', 'Tuesday': '11:30-0:0'...",2018-05-16 00:00:00,2018-05-16 00:00:00,[Six of us went here for an early Saturday lun...,"[5.0, 1.0, 4.0, 5.0, 1.0, 5.0, 4.0]",7,3.571429,"[positive, negative, positive, positive, negat..."
3,289,Chester County Health Department,Yes,30 MAIN,ChesterCountyFood,660 LANCASTER AVE,Berwyn,Chester,19312.0,PA,...,"{'RestaurantsPriceRange2': '2', 'GoodForMeal':...","American (New), Bars, Venues & Event Spaces, N...","{'Tuesday': '16:0-22:0', 'Wednesday': '16:0-22...",,2017-05-08 00:00:00,[So I read all the reviews and was having seco...,"[2.0, 5.0, 5.0, 3.0, 4.0, 2.0, 1.0, 1.0, 5.0, ...",11,3.363636,"[negative, positive, positive, neutral, positi..."
4,15080,Chester County Health Department,Yes,30 MAIN,ChesterCountyFood,660 LANCASTER AVE,Berwyn,Chester,19312.0,PA,...,"{'RestaurantsPriceRange2': '2', 'GoodForMeal':...","American (New), Bars, Venues & Event Spaces, N...","{'Tuesday': '16:0-22:0', 'Wednesday': '16:0-22...",2018-05-03 00:00:00,2018-05-03 00:00:00,"[Ordered through Grub Hub, and the delivery ca...","[1.0, 2.0, 5.0, 2.0, 1.0]",5,2.2,"[negative, positive, negative, positive, negat..."


In [32]:
# Export DataFrame to CSV, and download
data.to_csv('labeled_inspections_with_reviews_sentiments.csv', index=False)
files.download('/content/labeled_inspections_with_reviews_sentiments.csv')