This ipython notebook takes all the corrections, creates a new python DataFrame which is then output to a csv file. This csv file is meant to be uploaded to Google Drive to provide the basis for the Google Fusion Table.

In [None]:
import dill as pickle
import pandas as pd
import numpy as np

In [None]:
good_cols = ['business_id','name','full_address','city','state', \
             'latitude','longitude','stars','review_count','categories']
business_datafile = '~/capstone/data/yelp_academic_dataset_business.csv'
business = pd.read_csv(business_datafile, usecols=good_cols)
business.tail()

In [None]:
filename = '/home/vagrant/capstone/data/date_adjusted_ratings.pkl'
with open(filename,'r') as f:
    date_adjusted_ratings = pickle.load(f)

In [None]:
final_ratings = business.copy()
final_ratings = pd.merge(business, date_adjusted_ratings, on='business_id')
final_ratings.rename(columns={'stars_x': 'yelp_stars_round', 'stars_y': 'yelp_stars_calc'}, inplace=True)
final_ratings['date_correction'] = -1.*final_ratings['date_correction']
final_ratings.head()

In [None]:
anchor_datafile = '/home/vagrant/capstone/data/anchor_adjusted_ratings.pkl'
with open(anchor_datafile,'r') as f:
    anchor_df = pickle.load(f)

anchor_df['anchor_correction'] = anchor_df['anchor_avg']-anchor_df['raw_avg']
anchor_df.head()

In [None]:
final_ratings = pd.merge(final_ratings, anchor_df, on='business_id')
final_ratings.drop(['raw_avg'], inplace=True, axis=1)
final_ratings.head()

In [None]:
city_datafile = '/home/vagrant/capstone/data/city_adjusted_ratings.pkl'
with open(city_datafile,'r') as f:
    city_df = pickle.load(f)

city_df.drop(['state','stars'], axis=1, inplace=True)
city_df.rename(columns={'adjustment': 'city_correction', 'city_adj_stars': 'city_avg'}, inplace=True)
city_df.head()

In [None]:
final_ratings = pd.merge(final_ratings, city_df, on='business_id')
final_ratings['total_correction'] = final_ratings.date_correction + \
                                    final_ratings.anchor_correction + \
                                    final_ratings.city_correction
final_ratings['corrected_stars'] = final_ratings.yelp_stars_calc + \
                                    final_ratings.date_correction + \
                                    final_ratings.anchor_correction + \
                                    final_ratings.city_correction
final_ratings['correction_for_color'] = final_ratings.date_correction + final_ratings.anchor_correction
final_ratings.head()

In [None]:
ratings_rounded = final_ratings.round( {'yelp_stars_calc': 2, 'anchor_correction': 2,
     'city_correction': 2, 'date_correction': 2, 'corrected_stars': 2, 'total_correction': 2})
ratings_rounded['star_for_checkbox'] = [round(x) for x in ratings_rounded.corrected_stars]
ratings_rounded.head()

In [None]:
#filename = '/home/vagrant/capstone/data/final_adjusted_ratings.pkl'
#with open(filename,'w') as f:
#    pickle.dump(ratings_rounded, f)
filename = '/home/vagrant/capstone/data/final_adjusted_ratings.csv'
ratings_rounded.to_csv(filename, index=False, encoding='utf-8')