# This is a Sample File based on Export from Search Query

In [2]:
#import modules and dependencies
import requests
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import gmaps
from pprint import pprint
from config import gkey
from config import api_key
gmaps.configure(api_key= gkey)

#filters for dataframe from export.
table = pd.read_csv('data/export.csv')
#drop columns and duplicates for readability
master=table.drop(columns=['Unnamed: 0', 'Business ID']).drop_duplicates()

In [3]:
#sort the dataframe by number of reviews, Taking away duplicates
master2= master.sort_values(by='Reviews', ascending=False)

In [4]:
master2

Unnamed: 0,Business,City,State,Category,Latitude,Longitude,Price,Reviews,Rating
44,Katz's Delicatessen,New York,NY,Delis,40.722237,-73.987430,2,11713,4.0
25,Ippudo NY,New York,NY,Ramen,40.730920,-73.990150,2,9849,4.0
3449,The Halal Guys,New York,NY,Food Stands,40.761756,-73.979230,1,9250,4.0
5,Joe's Shanghai,New York,NY,Shanghainese,40.714669,-73.997760,2,5944,4.0
4449,Peter Luger,Brooklyn,NY,Steakhouses,40.709945,-73.962478,4,5535,4.0
...,...,...,...,...,...,...,...,...,...
45,Baodega,New York,NY,Shanghainese,40.740140,-73.991780,0,62,4.0
2749,Jintana Thai Farmhouse,Brooklyn,NY,Thai,40.666597,-73.982110,0,59,4.5
4899,Sugar Momma,New York,NY,Cocktail Bars,40.708310,-74.005670,2,48,4.5
4349,shabushabu Mayumon,New York,NY,Japanese,40.714340,-73.992050,0,8,5.0


# Change the Groupby to Category to Get Breakdown of Restaurants in Given Area

In [5]:
# master2.groupby('Category')
#count the different categories in the category count.
category_counts = master2['Category'].nunique()
#get the average of the reviews for sample of 50
average_reviews = master2['Reviews'].mean()
#get the average of the ratings for sample of 50
average_rating = master2['Rating'].mean()
#average price rating throughout sample
average_price = master2['Price'].mean()

#group by category
grouped_east_df = master2.groupby('Category')

#counts the total of each category within the column
total_category = grouped_east_df['Category'].count()
price_avg = grouped_east_df['Price'].mean()
review_counts = grouped_east_df['Reviews'].mean()
rating_counts = grouped_east_df['Rating'].mean()

#This will populate an array of all categories in Sample.
# east_category_unique = grouped["Category"].unique()
# east_category_unique #121 unique categories
# east_total_categories = len(east_category_unique)

#Populate a dataframe with specfics on Manhattan,NY Grouped by Category.
#This will populate the top 10 Categories when using .head(10)Can Be Changed for scope.
category_stats_df= pd.DataFrame({
    "Number of Restaurants": total_category,
    "Average Price of Category": price_avg.round(2),
    "Average # of Reviews per Restaurant": review_counts.astype(int),
    "Average Rating by Category": rating_counts.round(2)
}).sort_values(by='Number of Restaurants', ascending=False).head(10)

# Populate Dataframe Filtering by Greater Number of Restaurants in Sample

In [6]:
category_stats_df.head(10)

Unnamed: 0_level_0,Number of Restaurants,Average Price of Category,Average # of Reviews per Restaurant,Average Rating by Category
Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Chinese,18,1.5,790,4.25
American (New),15,2.4,1022,4.13
Japanese,11,1.91,656,4.36
Italian,9,2.11,1544,4.17
Korean,8,1.88,1120,4.44
French,6,2.17,1010,4.25
Seafood,6,2.0,1675,4.25
Cajun/Creole,5,2.0,1224,4.2
Thai,4,1.5,2071,4.5
American (Traditional),4,2.5,626,4.12


In [8]:
#display of sorted dataframe.
master2
top_ten = pd.DataFrame(master.sort_values(by='Reviews', ascending=False).head(10))

# Create a Table of Top 10 Places in Sample

In [9]:
top_ten

Unnamed: 0,Business,City,State,Category,Latitude,Longitude,Price,Reviews,Rating
44,Katz's Delicatessen,New York,NY,Delis,40.722237,-73.98743,2,11713,4.0
25,Ippudo NY,New York,NY,Ramen,40.73092,-73.99015,2,9849,4.0
3449,The Halal Guys,New York,NY,Food Stands,40.761756,-73.97923,1,9250,4.0
5,Joe's Shanghai,New York,NY,Shanghainese,40.714669,-73.99776,2,5944,4.0
4449,Peter Luger,Brooklyn,NY,Steakhouses,40.709945,-73.962478,4,5535,4.0
6,Burger & Lobster,New York,NY,Seafood,40.74007,-73.99344,2,5274,4.0
15,Clinton Street Baking Company,New York,NY,Bakeries,40.721128,-73.983933,2,4852,4.0
2,LoveMama,New York,NY,Thai,40.730386,-73.986061,2,4579,4.5
12,Jacob's Pickles,New York,NY,Comfort Food,40.78665,-73.975528,2,3894,4.0
4649,Carmine's Italian Restaurant - Times Square,New York,NY,Italian,40.757496,-73.986684,2,3593,4.0


# Create a Heatmap Based on Sample of 50 using # of Reviews as Weight

In [10]:
#create a heatmap of top ten places in sample
coordinates = master2[["Latitude", "Longitude"]]
# newyork_coordinates
weights = master2["Reviews"].astype(float)
# Heatmap 
fig = gmaps.figure()
heatmap_layer = gmaps.heatmap_layer(coordinates, weights=weights,
                                   max_intensity=100,
                                   point_radius=10)
fig.add_layer(heatmap_layer)
fig

Figure(layout=FigureLayout(height='420px'))

# Create Different Dataframes/Pivot Tables Based on different Groupings with Categories.

In [18]:
#this will yeild broken down by state and price averages
# grouped = master2.groupby(['State'])

#this will break down by ratings in states.
grouped = master2.groupby(['State', 'Rating'])

#this will breakdown by state, ratings and price and offer price by rating
# grouped = master2.groupby(['State', 'Price', 'Rating'])

#this changed to the grouping of the categories, Just redundant to the individual dataframes.
# grouped = master2.groupby(['State', 'Category', 'Price'])

# grouped = master2.groupby(['Category','Rating'])

#breakdown categories by top 10 popular
# grouped= master2.groupby(['Category'])
# df.sort_values(by='col1', ascending=False)

In [20]:
#TopTen Code.
# grouped_merged_df = merged_table_df.groupby(['Category'])
total_grouped_categories = grouped['Category'].count()
grouped_price_avg = grouped['Price'].mean()
grouped_reviews_count = grouped['Reviews'].mean()
total_grouped_review_counts = grouped['Reviews'].count()
average_grouped_ratings = grouped['Rating'].mean()
grouped_category_stats_df= pd.DataFrame({
    "Total Restaurants by Sample": total_grouped_categories,
    "Average Price By Category": grouped_price_avg.round(2),
    "Average # of Reviews per Restaurant": grouped_reviews_count.astype(int),
    "Average Ratings by Category": average_grouped_ratings.round(2)
})
grouped_category_stats_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Total Restaurants by Sample,Average Price By Category,Average # of Reviews per Restaurant,Average Ratings by Category
State,Rating,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
NY,3.5,2,2.0,399,3.5
NY,4.0,79,1.96,1776,4.0
NY,4.5,61,2.0,956,4.5
NY,5.0,7,1.43,202,5.0
