In [13]:
# Import dependencies
import pandas as pd
import datetime as dt
import json

In [3]:
# Import data
file = "DisneyReviews.csv"
reviewsdf = pd.read_csv(file, encoding='latin-1')
reviewsdf.head()

Unnamed: 0,Review_ID,Rating,Year_Month,Reviewer_Location,Review_Text,Branch
0,670801367,5,2019-4,United States,This place has always been and forever will be...,Disneyland_California
1,670760708,5,2019-4,United States,A great day of simple fun and thrills. Bring c...,Disneyland_California
2,670565072,4,2019-5,Australia,All and all a great day was had. The crowds ar...,Disneyland_California
3,670544335,5,2019-4,United States,Having been to the Florida location numerous t...,Disneyland_California
4,670472278,5,2019-4,Canada,"Had the 4 day pass, spent 3 at DL and one at C...",Disneyland_California


In [4]:
# Format Year_Month to datetime
reviewsdf['YYYY-MM'] = pd.to_datetime(reviewsdf['Year_Month'], format='%Y-%m', errors='coerce')
reviewsdf.head()

Unnamed: 0,Review_ID,Rating,Year_Month,Reviewer_Location,Review_Text,Branch,YYYY-MM
0,670801367,5,2019-4,United States,This place has always been and forever will be...,Disneyland_California,2019-04-01
1,670760708,5,2019-4,United States,A great day of simple fun and thrills. Bring c...,Disneyland_California,2019-04-01
2,670565072,4,2019-5,Australia,All and all a great day was had. The crowds ar...,Disneyland_California,2019-05-01
3,670544335,5,2019-4,United States,Having been to the Florida location numerous t...,Disneyland_California,2019-04-01
4,670472278,5,2019-4,Canada,"Had the 4 day pass, spent 3 at DL and one at C...",Disneyland_California,2019-04-01


In [5]:
# Drop duplicate reviews
reviewsdf.drop_duplicates(subset='Review_Text', inplace=True, keep='first')
reviewsdf

Unnamed: 0,Review_ID,Rating,Year_Month,Reviewer_Location,Review_Text,Branch,YYYY-MM
0,670801367,5,2019-4,United States,This place has always been and forever will be...,Disneyland_California,2019-04-01
1,670760708,5,2019-4,United States,A great day of simple fun and thrills. Bring c...,Disneyland_California,2019-04-01
2,670565072,4,2019-5,Australia,All and all a great day was had. The crowds ar...,Disneyland_California,2019-05-01
3,670544335,5,2019-4,United States,Having been to the Florida location numerous t...,Disneyland_California,2019-04-01
4,670472278,5,2019-4,Canada,"Had the 4 day pass, spent 3 at DL and one at C...",Disneyland_California,2019-04-01
...,...,...,...,...,...,...,...
19401,1563280,5,missing,United States,I have taken my music groups to Disneyland for...,Disneyland_California,NaT
19402,1540854,5,missing,United States,This is definitely the Happiest Place on Earth...,Disneyland_California,NaT
19403,1534364,1,missing,United States,"never again...what a horrible experience, the ...",Disneyland_California,NaT
19404,1506324,5,missing,United States,We take a long weekend trip to California each...,Disneyland_California,NaT


In [6]:
# Drop branch, since all are Disneyland_California

reviewsdf.drop('Branch', axis=1, inplace=True)
reviewsdf.head()

Unnamed: 0,Review_ID,Rating,Year_Month,Reviewer_Location,Review_Text,YYYY-MM
0,670801367,5,2019-4,United States,This place has always been and forever will be...,2019-04-01
1,670760708,5,2019-4,United States,A great day of simple fun and thrills. Bring c...,2019-04-01
2,670565072,4,2019-5,Australia,All and all a great day was had. The crowds ar...,2019-05-01
3,670544335,5,2019-4,United States,Having been to the Florida location numerous t...,2019-04-01
4,670472278,5,2019-4,Canada,"Had the 4 day pass, spent 3 at DL and one at C...",2019-04-01


In [7]:
# Export transformed data to json
reviewsdf.to_json('reviews.json', orient='records')

In [7]:
# Create a table of unique countries and their average ratings for Disneyland
reviewsdf.head()

Unnamed: 0,Review_ID,Rating,Year_Month,Reviewer_Location,Review_Text,YYYY-MM
0,670801367,5,2019-4,United States,This place has always been and forever will be...,2019-04-01
1,670760708,5,2019-4,United States,A great day of simple fun and thrills. Bring c...,2019-04-01
2,670565072,4,2019-5,Australia,All and all a great day was had. The crowds ar...,2019-05-01
3,670544335,5,2019-4,United States,Having been to the Florida location numerous t...,2019-04-01
4,670472278,5,2019-4,Canada,"Had the 4 day pass, spent 3 at DL and one at C...",2019-04-01


In [14]:
countries = reviewsdf['Reviewer_Location'].unique().tolist()
print(countries)

# export to json
jsonString = json.dumps(countries)
jsonFile = open("countries.json", "w")
jsonFile.write(jsonString)
jsonFile.close()

['United States', 'Australia', 'Canada', 'Lebanon', 'New Zealand', 'United Kingdom', 'Philippines', 'Guam', 'Bangladesh', 'Egypt', 'Sweden', 'South Africa', 'Brazil', 'China', 'Mexico', 'Tanzania', 'Germany', 'Ecuador', 'Portugal', 'Romania', 'Cambodia', 'Poland', 'Hungary', 'Costa Rica', 'Singapore', 'United Arab Emirates', 'India', 'Turks and Caicos Islands', 'Spain', 'Indonesia', 'Hong Kong', 'France', 'Japan', 'Israel', 'Macau', 'Ireland', 'Norway', 'Italy', 'Chile', 'Malta', 'Sri Lanka', 'Puerto Rico', 'Kuwait', 'Greece', 'Netherlands', 'Bahrain', 'Taiwan', 'Vietnam', 'Russia', 'U.S. Virgin Islands', 'Saudi Arabia', 'Colombia', 'Switzerland', 'Slovakia', 'Botswana', 'Iceland', 'Czechia', 'Peru', 'Malaysia', 'Qatar', 'Aruba', 'Guatemala', 'South Korea', 'Finland', 'Cook Islands', 'Denmark', 'Austria', 'French Polynesia', 'Thailand', 'Rwanda', 'Tunisia', 'Armenia', 'Croatia', 'Ethiopia', 'Uruguay', 'Panama', 'Argentina', 'Vanuatu', 'Namibia', 'Belgium', 'Pakistan', 'Cuba', 'Turkey',

In [25]:
countriesdf = reviewsdf.groupby('Reviewer_Location', as_index=False)['Rating'].agg('mean')

countriesdf.rename(columns = {'Rating': 'Avg_Rating'}, inplace=True)
countriesdf.head()

Unnamed: 0,Reviewer_Location,Avg_Rating
0,Argentina,4.444444
1,Armenia,5.0
2,Aruba,5.0
3,Australia,4.538807
4,Austria,4.2


In [30]:
locationcount = reviewsdf['Reviewer_Location'].value_counts().rename_axis('Reviewer_Location').reset_index(name='Reviews_Count')

locationcount

Unnamed: 0,Reviewer_Location,Reviews_Count
0,United States,12333
1,Australia,2448
2,Canada,1841
3,United Kingdom,1019
4,New Zealand,527
...,...,...
106,The Bahamas,1
107,Tunisia,1
108,Nicaragua,1
109,Macau,1


In [31]:
reviewsbycountry = pd.merge(countriesdf, locationcount, on='Reviewer_Location')

reviewsbycountry

Unnamed: 0,Reviewer_Location,Avg_Rating,Reviews_Count
0,Argentina,4.444444,9
1,Armenia,5.000000,1
2,Aruba,5.000000,2
3,Australia,4.538807,2448
4,Austria,4.200000,5
...,...,...,...
106,United Kingdom,4.290481,1019
107,United States,4.393821,12333
108,Uruguay,3.750000,4
109,Vanuatu,4.000000,1


In [32]:
# Export transformed data to json
reviewsbycountry.to_json('summary.json', orient='records')