In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import gmaps
import json
from pprint import pprint

five_cities = pd.read_csv("output_data/restaurants_category_10_five_cities.csv")

In [2]:
 #Reorganising columns inplace for future use - dropping irrelevant columns (unnamed and zipcode)  
combined = five_cities[['Restaurant_id',"Name", "Locality", "Address", "City", "Latitude", "Longitude", "Price Range", 
                      "Average Cost for two", "User Rating", "Rating Text", "Votes", "all_reviews_count", "Cuisines"]]

 #Removing the duplicates sorted by relevant fields
final_clean = combined.drop_duplicates(subset = ['Restaurant_id', 'Address'], keep = "first")       

 #Cleaning data for any empty cells in critical fields 
final_clean.dropna(subset=['Price Range','Average Cost for two','User Rating','Rating Text','Votes','all_reviews_count'],
                how = 'any')

final_clean.head()

Unnamed: 0,Restaurant_id,Name,Locality,Address,City,Latitude,Longitude,Price Range,Average Cost for two,User Rating,Rating Text,Votes,all_reviews_count,Cuisines
0,16585905,Tipo 00,CBD,"361 Little Bourke Street, CBD, Melbourne",Melbourne,-37.813528,144.961973,4,150,4.9,Excellent,1927,716,Italian
1,16586014,Minamishima,Richmond,"4 Lord Street, Richmond, Melbourne, VIC",Melbourne,-37.819831,145.005194,4,450,4.9,Excellent,747,290,"Japanese, Sushi"
2,16572612,Vue de monde,CBD,"Level 55, Rialto, 525 Collins Street, CBD, Mel...",Melbourne,-37.818954,144.957934,4,600,4.9,Excellent,3225,986,"Australian, Contemporary"
3,17881527,Dexter,Preston,"456 High Street, Preston, Melbourne",Melbourne,-37.736196,145.004456,4,110,4.9,Excellent,1473,682,"American, BBQ"
4,16574138,Suzuran,Camberwell,"1025 Burke Road, Camberwell, Melbourne",Melbourne,-37.821765,145.058465,2,35,4.9,Excellent,803,194,"Japanese, Sushi"


In [3]:
cleaned_df = final_clean.loc[(final_clean["Average Cost for two"] != 0) &
                        (final_clean["Average Cost for two"] != 25000017)]
cleaned_df.head()

Unnamed: 0,Restaurant_id,Name,Locality,Address,City,Latitude,Longitude,Price Range,Average Cost for two,User Rating,Rating Text,Votes,all_reviews_count,Cuisines
0,16585905,Tipo 00,CBD,"361 Little Bourke Street, CBD, Melbourne",Melbourne,-37.813528,144.961973,4,150,4.9,Excellent,1927,716,Italian
1,16586014,Minamishima,Richmond,"4 Lord Street, Richmond, Melbourne, VIC",Melbourne,-37.819831,145.005194,4,450,4.9,Excellent,747,290,"Japanese, Sushi"
2,16572612,Vue de monde,CBD,"Level 55, Rialto, 525 Collins Street, CBD, Mel...",Melbourne,-37.818954,144.957934,4,600,4.9,Excellent,3225,986,"Australian, Contemporary"
3,17881527,Dexter,Preston,"456 High Street, Preston, Melbourne",Melbourne,-37.736196,145.004456,4,110,4.9,Excellent,1473,682,"American, BBQ"
4,16574138,Suzuran,Camberwell,"1025 Burke Road, Camberwell, Melbourne",Melbourne,-37.821765,145.058465,2,35,4.9,Excellent,803,194,"Japanese, Sushi"


In [6]:
 #Printing how many rows were lost during the process
    #Finding total amount of restaurants included 
total_restaurants = len(combined)
    #Calculating total amount of duplicates
dup_result = len(combined.drop_duplicates())
    #Finding the total amount of duplicates
dup_amount =  total_restaurants - dup_result
new_rest_count1 = dup_result
    #Finding how many rows dropped due by invalid values        
dropna_count = len(final_clean.dropna(subset = ['Price Range','Average Cost for two','User Rating','Rating Text','Votes',
                                             'all_reviews_count'], how = 'any'))
items_dropped = new_rest_count1 - dropna_count
new_rest_count2 = dropna_count

    #Removing invalid values from average cost for two
drop_invalid = new_rest_count2 - len(cleaned_df)

    #tallying invalid totals
invalid_count = drop_invalid + items_dropped

    #rows removed count
rows_removed = (dup_amount + invalid_count)    
#Showing how many rows needed to be removed during the cleaning process
print(f'Following the cleaning process, {rows_removed} rows were removed as there was {dup_amount} duplicates and {invalid_count} invalid values')


Following the cleaning process, 103 rows were removed as there was 100 duplicates and 3 invalid values


In [11]:
#Creating Data Frame of Top100 Restaurants among all cities
#Selecting the top 100 restaurants in Australia based on User ratings and number of votes
ranked_df = cleaned_df.sort_values(["User Rating", "Votes"], ascending = False)

#Reset the index
new_index_ranked = ranked_df.reset_index(drop=True)
new_index_ranked

#Select the top 100 restaurants
top_100 = new_index_ranked.loc[new_index_ranked.index <100]
top_100

Unnamed: 0,Restaurant_id,Name,Locality,Address,City,Latitude,Longitude,Price Range,Average Cost for two,User Rating,Rating Text,Votes,all_reviews_count,Cuisines
0,16572612,Vue de monde,CBD,"Level 55, Rialto, 525 Collins Street, CBD, Mel...",Melbourne,-37.818954,144.957934,4,600,4.9,Excellent,3225,986,"Australian, Contemporary"
1,16585905,Tipo 00,CBD,"361 Little Bourke Street, CBD, Melbourne",Melbourne,-37.813528,144.961973,4,150,4.9,Excellent,1927,716,Italian
2,17881527,Dexter,Preston,"456 High Street, Preston, Melbourne",Melbourne,-37.736196,145.004456,4,110,4.9,Excellent,1473,682,"American, BBQ"
3,16596036,Ha-Lu,"Oxford Street, Leederville","4/401 Oxford Street, Mount Hawthorn, Perth",Perth,-31.923377,115.841146,3,60,4.9,Excellent,1066,240,"Japanese, Tapas"
4,16598837,Run Amuk,"Orient Street, South Fremantle","386A South Terrace, South Fremantle, Fremantle...",Perth,-32.072277,115.753065,2,50,4.9,Excellent,1038,340,Fast Food
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,16570741,Añada,Fitzroy,"197 Gertrude Street, Fitzroy, Melbourne",Melbourne,-37.806092,144.981169,4,110,4.6,Excellent,1232,374,"Spanish, Tapas"
96,16571353,Flower Drum,CBD,"17 Market Lane, CBD, Melbourne",Melbourne,-37.811927,144.969300,4,200,4.6,Excellent,1190,391,Chinese
97,16571254,Donovans,St Kilda,"40 Jacka Boulevard, St Kilda, Melbourne, VIC",Melbourne,-37.868544,144.975205,4,190,4.6,Excellent,1181,316,Modern Australian
98,16572727,MoVida Next Door,CBD,"164 Flinders Street, CBD, Melbourne",Melbourne,-37.816800,144.969229,3,90,4.6,Excellent,1181,276,"Spanish, Tapas"
