# New York Foodies 

In [1]:
# Import Dependencies 
import os 
import csv 
import pandas as pd 
from matplotlib import pyplot as plt
from scipy import stats
import numpy as np
import random
import json
import requests

In [2]:
# Read CSV File 
food_data_df = pd.read_csv('food_order.csv')
food_data_df


Unnamed: 0,order_id,customer_id,restaurant_name,cuisine_type,cost_of_the_order,day_of_the_week,rating,food_preparation_time,delivery_time
0,1477147,337525,Hangawi,Korean,30.75,Weekend,Not given,25,20
1,1477685,358141,Blue Ribbon Sushi Izakaya,Japanese,12.08,Weekend,Not given,25,23
2,1477070,66393,Cafe Habana,Mexican,12.23,Weekday,5,23,28
3,1477334,106968,Blue Ribbon Fried Chicken,American,29.20,Weekend,3,25,15
4,1478249,76942,Dirty Bird to Go,American,11.59,Weekday,4,25,24
...,...,...,...,...,...,...,...,...,...
1893,1476701,292602,Chipotle Mexican Grill $1.99 Delivery,Mexican,22.31,Weekend,5,31,17
1894,1477421,397537,The Smile,American,12.18,Weekend,5,31,19
1895,1477819,35309,Blue Ribbon Sushi,Japanese,25.22,Weekday,Not given,31,24
1896,1477513,64151,Jack's Wife Freda,Mediterranean,12.18,Weekday,5,23,31


In [3]:
# Count values to determine if there are inconsistencies on the numbers to verify what needs to be cleaned
food_data_df.count()

order_id                 1898
customer_id              1898
restaurant_name          1898
cuisine_type             1898
cost_of_the_order        1898
day_of_the_week          1898
rating                   1898
food_preparation_time    1898
delivery_time            1898
dtype: int64

In [4]:
# Describe the Data Set 
food_data_df.describe()

Unnamed: 0,order_id,customer_id,cost_of_the_order,food_preparation_time,delivery_time
count,1898.0,1898.0,1898.0,1898.0,1898.0
mean,1477496.0,171168.478398,16.498851,27.37197,24.161749
std,548.0497,113698.139743,7.483812,4.632481,4.972637
min,1476547.0,1311.0,4.47,20.0,15.0
25%,1477021.0,77787.75,12.08,23.0,20.0
50%,1477496.0,128600.0,14.14,27.0,25.0
75%,1477970.0,270525.0,22.2975,31.0,28.0
max,1478444.0,405334.0,35.41,35.0,33.0


In [5]:
columns = ['order_id', 'customer_id']

In [6]:
food_data_df.drop(columns=columns,inplace=True)

In [7]:
Clean_data=food_data_df.loc[food_data_df['rating']!='Not given']

In [8]:
Clean_data.head()

Unnamed: 0,restaurant_name,cuisine_type,cost_of_the_order,day_of_the_week,rating,food_preparation_time,delivery_time
2,Cafe Habana,Mexican,12.23,Weekday,5,23,28
3,Blue Ribbon Fried Chicken,American,29.2,Weekend,3,25,15
4,Dirty Bird to Go,American,11.59,Weekday,4,25,24
5,Tamarind TriBeCa,Indian,25.22,Weekday,3,20,24
7,Barbounia,Mediterranean,5.97,Weekday,3,33,30


In [10]:
Clean_data.dtypes

restaurant_name           object
cuisine_type              object
cost_of_the_order        float64
day_of_the_week           object
rating                    object
food_preparation_time      int64
delivery_time              int64
dtype: object

In [9]:
# Below are the restaurants with the symbols at the end, my purpose is to try and remove the symbol at the end 
#(this is optional if needed it can be removed)

food_data_df[food_data_df.restaurant_name.isin(["Joe's Shanghai \x8e_À\x8eü£¾÷´",'Big Wong Restaurant \x8c_¤¾Ñ¼'])]

Unnamed: 0,restaurant_name,cuisine_type,cost_of_the_order,day_of_the_week,rating,food_preparation_time,delivery_time
10,Big Wong Restaurant _¤¾Ñ¼,Chinese,5.92,Weekday,Not given,34,28
107,Joe's Shanghai _Àü£¾÷´,Chinese,15.81,Weekend,5,20,26
810,Joe's Shanghai _Àü£¾÷´,Chinese,16.1,Weekend,5,31,22
1377,Joe's Shanghai _Àü£¾÷´,Chinese,16.15,Weekend,3,33,20
1600,Joe's Shanghai _Àü£¾÷´,Chinese,22.31,Weekday,3,27,31


In [10]:
Clean_data['rating'] = Clean_data['rating'].astype("int")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Clean_data['rating'] = Clean_data['rating'].astype("int")


In [13]:
Clean_data.dtypes

restaurant_name           object
cuisine_type              object
cost_of_the_order        float64
day_of_the_week           object
rating                     int32
food_preparation_time      int64
delivery_time              int64
dtype: object

In [11]:
#group by cuisine type

cuisine_df = Clean_data.groupby(['cuisine_type'])


In [23]:
cuisine_df.mean()

  cuisine_df.mean()


Unnamed: 0_level_0,cost_of_the_order,rating,food_preparation_time,delivery_time
cuisine_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
American,16.812989,4.298913,27.388587,24.396739
Chinese,16.186541,4.338346,27.977444,23.496241
French,20.087,4.3,26.6,25.1
Indian,16.6634,4.54,26.28,23.3
Italian,17.13593,4.360465,27.627907,25.209302
Japanese,16.345641,4.373626,27.652015,23.74359
Korean,13.142222,4.111111,25.333333,21.111111
Mediterranean,15.8925,4.21875,27.875,23.1875
Mexican,16.319375,4.416667,25.854167,24.770833
Middle Eastern,20.075882,4.235294,26.676471,24.617647


In [24]:
rating_df = Clean_data.groupby(['rating'])

In [25]:
rating_df.describe()

Unnamed: 0_level_0,cost_of_the_order,cost_of_the_order,cost_of_the_order,cost_of_the_order,cost_of_the_order,cost_of_the_order,cost_of_the_order,cost_of_the_order,food_preparation_time,food_preparation_time,food_preparation_time,food_preparation_time,food_preparation_time,delivery_time,delivery_time,delivery_time,delivery_time,delivery_time,delivery_time,delivery_time,delivery_time
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
rating,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
3,188.0,16.223351,7.840886,4.75,12.0425,14.12,21.83,33.37,188.0,27.430851,...,32.0,35.0,188.0,24.558511,4.652845,15.0,21.0,25.0,28.0,33.0
4,386.0,16.710337,7.632996,4.75,12.13,14.16,22.7875,35.41,386.0,27.396373,...,31.75,35.0,386.0,23.867876,4.923377,15.0,20.0,24.0,28.0,33.0
5,588.0,16.965697,7.448696,4.47,12.13,15.205,22.8,33.03,588.0,27.355442,...,32.0,35.0,588.0,24.212585,5.018607,15.0,20.0,25.0,28.0,33.0


In [18]:
cuisine_df.describe()

Unnamed: 0_level_0,cost_of_the_order,cost_of_the_order,cost_of_the_order,cost_of_the_order,cost_of_the_order,cost_of_the_order,cost_of_the_order,cost_of_the_order,rating,rating,...,food_preparation_time,food_preparation_time,delivery_time,delivery_time,delivery_time,delivery_time,delivery_time,delivery_time,delivery_time,delivery_time
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
cuisine_type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
American,368.0,16.812989,7.567816,4.75,12.13,15.305,22.85,33.03,368.0,4.298913,...,31.0,35.0,368.0,24.396739,4.697404,15.0,21.0,25.0,28.0,33.0
Chinese,133.0,16.186541,7.342326,4.75,12.13,14.16,20.86,33.03,133.0,4.338346,...,32.0,35.0,133.0,23.496241,5.061177,15.0,19.0,24.0,28.0,33.0
French,10.0,20.087,8.170813,11.98,12.1925,17.825,29.1375,29.25,10.0,4.3,...,30.0,35.0,10.0,25.1,3.842742,17.0,23.25,25.5,28.0,29.0
Indian,50.0,16.6634,7.937862,5.34,9.855,15.96,23.8375,31.43,50.0,4.54,...,29.75,35.0,50.0,23.3,4.953869,15.0,18.25,24.0,26.0,32.0
Italian,172.0,17.13593,7.348053,5.72,12.13,14.77,22.8,33.03,172.0,4.360465,...,32.0,35.0,172.0,25.209302,4.904029,15.0,22.0,26.0,29.0,33.0
Japanese,273.0,16.345641,7.572002,4.47,11.83,14.07,22.26,33.37,273.0,4.373626,...,32.0,35.0,273.0,23.74359,5.033729,15.0,20.0,24.0,28.0,33.0
Korean,9.0,13.142222,6.552291,5.77,11.84,12.18,12.61,29.25,9.0,4.111111,...,26.0,33.0,9.0,21.111111,4.884784,16.0,16.0,21.0,26.0,26.0
Mediterranean,32.0,15.8925,7.485357,5.67,12.1675,13.995,16.345,35.41,32.0,4.21875,...,33.0,35.0,32.0,23.1875,5.257483,15.0,19.0,23.5,28.25,33.0
Mexican,48.0,16.319375,8.078039,4.85,11.3475,13.58,20.55,33.32,48.0,4.416667,...,28.25,35.0,48.0,24.770833,4.572069,17.0,21.0,25.0,28.25,33.0
Middle Eastern,34.0,20.075882,8.139889,5.77,13.6925,17.97,28.105,32.93,34.0,4.235294,...,29.75,34.0,34.0,24.617647,5.979838,15.0,18.5,25.5,29.0,33.0


In [26]:
food_data_df.head()

Unnamed: 0,restaurant_name,cuisine_type,cost_of_the_order,day_of_the_week,rating,food_preparation_time,delivery_time
0,Hangawi,Korean,30.75,Weekend,Not given,25,20
1,Blue Ribbon Sushi Izakaya,Japanese,12.08,Weekend,Not given,25,23
2,Cafe Habana,Mexican,12.23,Weekday,5,23,28
3,Blue Ribbon Fried Chicken,American,29.2,Weekend,3,25,15
4,Dirty Bird to Go,American,11.59,Weekday,4,25,24


In [20]:
res_name = Clean_data.groupby(['restaurant_name'])

In [21]:
res_name.describe()

Unnamed: 0_level_0,cost_of_the_order,cost_of_the_order,cost_of_the_order,cost_of_the_order,cost_of_the_order,cost_of_the_order,cost_of_the_order,cost_of_the_order,rating,rating,...,food_preparation_time,food_preparation_time,delivery_time,delivery_time,delivery_time,delivery_time,delivery_time,delivery_time,delivery_time,delivery_time
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
restaurant_name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
'wichcraft,1.0,8.100000,,8.10,8.1000,8.100,8.1000,8.10,1.0,5.000000,...,28.00,28.0,1.0,28.0,,28.0,28.00,28.0,28.00,28.0
12 Chairs,2.0,22.675000,9.015611,16.30,19.4875,22.675,25.8625,29.05,2.0,4.500000,...,27.75,28.0,2.0,29.0,4.242641,26.0,27.50,29.0,30.50,32.0
5 Napkin Burger,2.0,27.135000,2.708219,25.22,26.1775,27.135,28.0925,29.05,2.0,4.000000,...,26.75,27.0,2.0,21.0,0.000000,21.0,21.00,21.0,21.00,21.0
67 Burger,1.0,29.050000,,29.05,29.0500,29.050,29.0500,29.05,1.0,5.000000,...,20.00,20.0,1.0,28.0,,28.0,28.00,28.0,28.00,28.0
Amma,2.0,20.155000,15.945258,8.88,14.5175,20.155,25.7925,31.43,2.0,4.500000,...,25.25,27.0,2.0,25.5,2.121320,24.0,24.75,25.5,26.25,27.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Zero Otto Nove,1.0,12.180000,,12.18,12.1800,12.180,12.1800,12.18,1.0,4.000000,...,29.00,29.0,1.0,25.0,,25.0,25.00,25.0,25.00,25.0
brgr,1.0,5.720000,,5.72,5.7200,5.720,5.7200,5.72,1.0,3.000000,...,24.00,24.0,1.0,26.0,,26.0,26.00,26.0,26.00,26.0
da Umberto,1.0,12.270000,,12.27,12.2700,12.270,12.2700,12.27,1.0,5.000000,...,20.00,20.0,1.0,30.0,,30.0,30.00,30.0,30.00,30.0
ilili Restaurant,13.0,21.573077,8.378503,9.46,13.7300,24.200,29.2500,32.93,13.0,4.153846,...,29.00,34.0,13.0,25.0,6.244998,15.0,21.00,25.0,29.00,33.0


In [27]:
Clean_data.describe()

Unnamed: 0,cost_of_the_order,rating,food_preparation_time,delivery_time
count,1162.0,1162.0,1162.0,1162.0
mean,16.760766,4.344234,27.381239,24.154045
std,7.572578,0.741478,4.677922,4.930999
min,4.47,3.0,20.0,15.0
25%,12.13,4.0,23.0,20.0
50%,14.6,5.0,27.0,25.0
75%,22.75,5.0,32.0,28.0
max,35.41,5.0,35.0,33.0
