In [2]:
# Import dependencies and load in CSV files
import pandas as pd
from pathlib import Path 

nyc_inspection_df = pd.read_csv("Resources/nyc_restaurants/DOHMH_New_York_City_Restaurant_Inspection_Results.csv") # , index_col=0)
nyc_income_df = pd.read_csv("Resources/nyc_restaurants/Pre_restaurants.csv") #, index_col=0)

In [3]:
# Read the df from the NYC restaurant inspections
nyc_inspection_df.head(5)

Unnamed: 0,CAMIS,DBA,BORO,BUILDING,STREET,ZIPCODE,PHONE,CUISINE DESCRIPTION,INSPECTION DATE,ACTION,VIOLATION CODE,VIOLATION DESCRIPTION,CRITICAL FLAG,SCORE,GRADE,GRADE DATE,RECORD DATE,INSPECTION TYPE
0,40511702,NOTARO RESTAURANT,MANHATTAN,635,SECOND AVENUE,10016.0,2126863400,Italian,06/15/2015,Violations were cited in the following area(s).,02B,Hot food item not held at or above 140Âº F.,Critical,30.0,,,08/28/2017,Cycle Inspection / Initial Inspection
1,40511702,NOTARO RESTAURANT,MANHATTAN,635,SECOND AVENUE,10016.0,2126863400,Italian,11/25/2014,Violations were cited in the following area(s).,20F,Current letter grade card not posted.,Not Critical,,,,08/28/2017,Administrative Miscellaneous / Initial Inspection
2,50046354,VITE BAR,QUEENS,2507,BROADWAY,11106.0,3478134702,Italian,10/03/2016,Violations were cited in the following area(s).,10F,Non-food contact surface improperly constructe...,Not Critical,2.0,,,08/28/2017,Pre-permit (Operational) / Initial Inspection
3,50061389,TACK'S CHINESE TAKE OUT,STATEN ISLAND,11C,HOLDEN BLVD,10314.0,7189839854,Chinese,05/17/2017,Violations were cited in the following area(s).,02G,Cold food item held above 41Âº F (smoked fish ...,Critical,46.0,,,08/28/2017,Pre-permit (Operational) / Initial Inspection
4,41516263,NO QUARTER,BROOKLYN,8015,5 AVENUE,11209.0,7187019180,American,03/30/2017,Violations were cited in the following area(s).,04M,Live roaches present in facility's food and/or...,Critical,18.0,,,08/28/2017,Cycle Inspection / Initial Inspection


In [4]:
# Read the df from the income levels of the areas around the restaurants
nyc_income_df.head(5)

Unnamed: 0.1,Unnamed: 0,New York,Borough,income_num_returns(households),avg income per return(x1000s),income level,DBA,STREET,GRADE DATE,LATITUDE,LONGITUDE
0,0,10007,Manhattan,3640,693.379945,high income,MARIACHI'S RESTAURANT,CHAMBERS STREET,1/13/2020,40.716432,-74.010472
1,1,10007,Manhattan,3640,693.379945,high income,SPOTIFY - BARISTA BAR,GREENWICH STREET,1/22/2020,40.715553,-74.011021
2,2,10007,Manhattan,3640,693.379945,high income,STARBUCKS COFFEE COMPANY #29854,GREENWICH STREET,1/13/2020,40.715553,-74.011021
3,4,10007,Manhattan,3640,693.379945,high income,CANTEEN,WORLD TRADE CTR,1/23/2020,40.713941,-74.007401
4,5,10007,Manhattan,3640,693.379945,high income,JOE & THE JUICE GREENWICH ST,GREENWICH STREET,12/30/2019,40.715553,-74.011021


In [5]:
# Drop columns
clean_inspection_df = nyc_inspection_df.drop(['CAMIS', 'BUILDING', 'ZIPCODE', 'PHONE', 'INSPECTION DATE', 
                                              'ACTION', 'VIOLATION CODE', 'VIOLATION DESCRIPTION', 'CRITICAL FLAG', 
                                              'RECORD DATE', 'INSPECTION TYPE'], axis=1)

In [6]:
# Check cleaned dataframe
clean_inspection_df.head()

Unnamed: 0,DBA,BORO,STREET,CUISINE DESCRIPTION,SCORE,GRADE,GRADE DATE
0,NOTARO RESTAURANT,MANHATTAN,SECOND AVENUE,Italian,30.0,,
1,NOTARO RESTAURANT,MANHATTAN,SECOND AVENUE,Italian,,,
2,VITE BAR,QUEENS,BROADWAY,Italian,2.0,,
3,TACK'S CHINESE TAKE OUT,STATEN ISLAND,HOLDEN BLVD,Chinese,46.0,,
4,NO QUARTER,BROOKLYN,5 AVENUE,American,18.0,,


In [7]:
# Change column name to match the other dataset
clean_inspection_df.rename(columns = {'BORO':'Borough'}, inplace = True)

In [8]:
# Drop null values
clean_inspection_df2 = clean_inspection_df.dropna()

In [9]:
# Check new df with no null values
clean_inspection_df2.head()

Unnamed: 0,DBA,Borough,STREET,CUISINE DESCRIPTION,SCORE,GRADE,GRADE DATE
6,HENRI'S BACKYARD,BROOKLYN,4TH AVE,American,39.0,C,06/22/2017
7,RICHMOND COUNTY COUNTRY CLUB,STATEN ISLAND,TODT HILL ROAD,American,12.0,A,06/14/2017
9,TOMOE SUSHI,MANHATTAN,THOMPSON STREET,Japanese,13.0,A,10/06/2015
12,CRAB SPOT RESTAURANT,BROOKLYN,UNION STREET,Seafood,12.0,A,07/28/2016
13,M NOODLE SHOP,BROOKLYN,METROPOLITAN AVENUE,Chinese,10.0,A,01/19/2017


In [10]:
# Check if we dropped all null values
clean_inspection_df2.isnull().sum()

DBA                    0
Borough                0
STREET                 0
CUISINE DESCRIPTION    0
SCORE                  0
GRADE                  0
GRADE DATE             0
dtype: int64

In [11]:
# Drop columns
clean_income_df = nyc_income_df.drop(['Unnamed: 0', 'New York', 'income_num_returns(households)', 
                                      'avg income per return(x1000s)'], axis=1)

In [12]:
# Rename to all capital for consistent column names
clean_income_df.rename(columns = {'income level':'INCOME LEVEL'}, inplace = True)

In [13]:
# Check cleaned up df
clean_income_df.head()

Unnamed: 0,Borough,INCOME LEVEL,DBA,STREET,GRADE DATE,LATITUDE,LONGITUDE
0,Manhattan,high income,MARIACHI'S RESTAURANT,CHAMBERS STREET,1/13/2020,40.716432,-74.010472
1,Manhattan,high income,SPOTIFY - BARISTA BAR,GREENWICH STREET,1/22/2020,40.715553,-74.011021
2,Manhattan,high income,STARBUCKS COFFEE COMPANY #29854,GREENWICH STREET,1/13/2020,40.715553,-74.011021
3,Manhattan,high income,CANTEEN,WORLD TRADE CTR,1/23/2020,40.713941,-74.007401
4,Manhattan,high income,JOE & THE JUICE GREENWICH ST,GREENWICH STREET,12/30/2019,40.715553,-74.011021


In [14]:
# Check for null values
clean_income_df.isnull().sum()

Borough         0
INCOME LEVEL    0
DBA             0
STREET          0
GRADE DATE      0
LATITUDE        0
LONGITUDE       0
dtype: int64

In [15]:
# Exporting df as new CSV file
clean_inspection_df2.to_csv('Resources/clean_inspection_df2.csv', index = False)

In [16]:
# Exporting df as new CSV file
clean_income_df.to_csv('Resources/clean_income_df.csv', index = False)

In [17]:
# Drop columns for the purpose of joining cleanly on postgress
clean_income_df2 = clean_income_df.drop(['Borough', 'DBA', 'GRADE DATE', 'LATITUDE', 'LONGITUDE'], axis=1)

In [18]:
clean_income_df2.head()

Unnamed: 0,INCOME LEVEL,STREET
0,high income,CHAMBERS STREET
1,high income,GREENWICH STREET
2,high income,GREENWICH STREET
3,high income,WORLD TRADE CTR
4,high income,GREENWICH STREET
