In [38]:
!pip install azure-storage-blob # Microoft Azure
!pip install pyarrow
!pip install psycopg2 sqlalchemy



In [39]:
# import libraries
import pandas as pd
import numpy as np
import json
import requests
from io import StringIO
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
from math import ceil
import datetime
import calendar
from sqlalchemy import create_engine

In [40]:
# Azure Functions
def azure_upload_blob(connect_str, container_name, blob_name, data):
    blob_service_client = BlobServiceClient.from_connection_string(connect_str)
    blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob_name)
    blob_client.upload_blob(data, overwrite=True)
    print(f"Uploaded to Azure Blob: {blob_name}")

def azure_download_blob(connect_str, container_name, blob_name):
    import pandas as pd
    blob_service_client = BlobServiceClient.from_connection_string(connect_str)
    blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob_name)
    download_stream = blob_client.download_blob()
    blob_content = download_stream.readall().decode('utf-8')
    return blob_content

# Google Cloud Functions
def google_upload_blob(bucket_name, source_file_name, destination_blob_name):
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)
    blob.upload_from_filename(source_file_name)
    print(f"File {source_file_name} uploaded to {destination_blob_name}.")

def google_download_blob(bucket_name, source_blob_name, destination_file_name):
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(source_blob_name)
    blob.download_to_filename(destination_file_name)
    print(f"Blob {source_blob_name} downloaded to {destination_file_name}.")

# AWS Functions
def aws_upload_file(file_name, bucket, object_name=None):
    if object_name is None:
        object_name = os.path.basename(file_name)
    s3_client = boto3.client('s3')
    response = s3_client.upload_file(file_name, bucket, object_name)
    print(f"Uploaded {file_name} to S3 bucket {bucket}.")

def aws_download_file(bucket, object_name, file_name):
    s3_client = boto3.client('s3')
    s3_client.download_file(bucket, object_name, file_name)
    print(f"Downloaded {object_name} from S3 bucket {bucket}.")

### Get raw_NYCRest.csv from Azure

In [41]:
config_file_path = 'config.json'

# Load the JSON configuration file
with open(config_file_path, 'r') as config_file:
    config = json.load(config_file)

connection_string = config["connectionString"]
container_name = "groupproject"
blob_name = "groupdata2_raw_NYCRest.csv"

blob_content = azure_download_blob(connection_string, container_name, blob_name)

raw_NYCRest_df = pd.read_csv(StringIO(blob_content))
raw_NYCRest_df

Unnamed: 0,camis,dba,boro,building,street,zipcode,phone,inspection_date,critical_flag,record_date,latitude,longitude,cuisine_description,action,score,inspection_type,violation_code,violation_description,grade,grade_date
0,41022989,MULBERRY STREET BAR,Manhattan,176 1/2,MULBERRY STREET,10013.0,2122269345,2021-09-17T00:00:00.000,Not Applicable,2024-04-16T06:00:10.000,40.720320,-73.996894,Italian,Violations were cited in the following area(s).,0.0,Inter-Agency Task Force / Initial Inspection,,,,
1,50061025,SUBWAY,Manhattan,21,MAIDEN LANE,10038.0,2125714417,2022-06-22T00:00:00.000,Not Critical,2024-04-16T06:00:10.000,40.709253,-74.009082,Sandwiches,Violations were cited in the following area(s).,46.0,Cycle Inspection / Initial Inspection,10C,Lighting Inadequate,,
2,50103012,TIFFANY CAFE,Manhattan,6,EAST 57 STREET,10022.0,2126135511,2023-03-29T00:00:00.000,Not Applicable,2024-04-16T06:00:10.000,40.762773,-73.973547,American,No violations were recorded at the time of thi...,0.0,Cycle Inspection / Initial Inspection,,,A,2023-03-29T00:00:00.000
3,50104962,EMPANADAS RD,Bronx,1971,DR MARTIN L KING JR BLVE,,3477264990,2021-10-08T00:00:00.000,Not Critical,2024-04-16T06:00:10.000,0.000000,0.000000,Latin American,Violations were cited in the following area(s).,,Administrative Miscellaneous / Re-inspection,22F,MISBRANDED AND LABELING,,
4,50016699,TU CASA,Queens,10311,QUEENS BLVD,11375.0,7182752421,2022-04-26T00:00:00.000,Critical,2024-04-16T06:00:10.000,40.724745,-73.850128,Spanish,Violations were cited in the following area(s).,11.0,Cycle Inspection / Initial Inspection,02B,Hot food item not held at or above 140º F.,A,2022-04-26T00:00:00.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
221974,50084577,FARM TO BURGER,Manhattan,310,WEST 40 STREET,10018.0,6466095122,2023-02-10T00:00:00.000,Not Critical,2024-04-16T06:00:10.000,40.756202,-73.991359,American,Violations were cited in the following area(s).,11.0,Cycle Inspection / Re-inspection,10F,Non-food contact surface or equipment made of ...,A,2023-02-10T00:00:00.000
221975,50051743,ORCHARD BEACH SNACK BAR SECTION 10,Bronx,1,ORCHARD BEACH ROAD,10464.0,6462294471,2017-06-29T00:00:00.000,Critical,2024-04-16T06:00:10.000,40.865153,-73.802838,Coffee/Tea,Violations were cited in the following area(s).,11.0,Cycle Inspection / Initial Inspection,04K,Evidence of rats or live rats present in facil...,A,2017-06-29T00:00:00.000
221976,50088485,VAN LEEUWEN ICE CREAM,Brooklyn,81,BERGEN STREET,11201.0,7187011630,2023-03-27T00:00:00.000,Not Critical,2024-04-16T06:00:10.000,40.686556,-73.990023,Frozen Desserts,Violations were cited in the following area(s).,8.0,Cycle Inspection / Re-inspection,10G,Dishwashing and ware washing: Cleaning and sa...,A,2023-03-27T00:00:00.000
221977,41309636,LONA PIZZA,Bronx,373,EAST 204 STREET,10467.0,7186556600,2022-03-17T00:00:00.000,Critical,2024-04-16T06:00:10.000,40.871567,-73.877236,Pizza,Violations were cited in the following area(s).,33.0,Cycle Inspection / Initial Inspection,02B,Hot food item not held at or above 140º F.,,


In [60]:
blob_content

'dba,boro,building,street,zipcode,phone,cuisine_description,latitude,longitude,yelp_rating,yelp_review_count\nNONIS COFFEE SHOP,Bronx,5921,RIVERDALE AVENUE,10471.0,7186010032,American,40.907328650739,-73.903981191524,3.5,32.0\nRIVERDALE STEAK HOUSE,Bronx,5700,RIVERDALE AVENUE,10471.0,7185499946,Steakhouse,40.906719442359,-73.904123166258,3.6,72.0\nSKYCLUB CAFE,Bronx,5701,ARLINGTON AVENUE,10471.0,2016169579,American,40.906647570777,-73.906843794398,5.0,1.0\nCARLOS AND GABBY\'S,Bronx,5685,RIVERDALE AVENUE,10471.0,6463610119,Jewish/Kosher,40.906480724739,-73.90421033699,3.2,81.0\nTHE PIZZA BLOCK,Bronx,5677,RIVERDALE AVENUE,10471.0,7187087004,Jewish/Kosher,40.906340774536,-73.904246716497,2.1,16.0\nGRUENEBAUM\'S BAKERY & CAFE,Bronx,5663,RIVERDALE AVENUE,10471.0,7184324779,Jewish/Kosher,40.906096549329,-73.904312188114,2.9,34.0\nADDEO\'S RIVERDALE PIZZERIA,Bronx,5654,RIVERDALE AVENUE,10471.0,7185494750,Pizza,40.905558717994,-73.904472142443,3.6,115.0\nRIVERDALE BAGELS,Bronx,5650,RIVERDALE A

In [61]:
raw_NYCRest_df.columns

Index(['camis', 'dba', 'boro', 'building', 'street', 'zipcode', 'phone',
       'inspection_date', 'critical_flag', 'record_date', 'latitude',
       'longitude', 'cuisine_description', 'action', 'score',
       'inspection_type', 'violation_code', 'violation_description', 'grade',
       'grade_date'],
      dtype='object')

In [62]:
# Specify the columns you want to keep
columns_to_keep = [
    'dba', 'boro', 'building', 'street', 'zipcode', 'phone', 'inspection_date', 'critical_flag', 'cuisine_description', 'action',
    'score', 'inspection_type', 'violation_code', 'violation_description', 'grade', 'grade_date', 'latitude', 'longitude'
]

# Filter the DataFrame to include only the columns of interest
raw_NYCRest_df_filtered = raw_NYCRest_df[columns_to_keep]

raw_NYCRest_df_filtered

Unnamed: 0,dba,boro,building,street,zipcode,phone,inspection_date,critical_flag,cuisine_description,action,score,inspection_type,violation_code,violation_description,grade,grade_date,latitude,longitude
0,MULBERRY STREET BAR,Manhattan,176 1/2,MULBERRY STREET,10013.0,2122269345,2021-09-17T00:00:00.000,Not Applicable,Italian,Violations were cited in the following area(s).,0.0,Inter-Agency Task Force / Initial Inspection,,,,,40.720320,-73.996894
1,SUBWAY,Manhattan,21,MAIDEN LANE,10038.0,2125714417,2022-06-22T00:00:00.000,Not Critical,Sandwiches,Violations were cited in the following area(s).,46.0,Cycle Inspection / Initial Inspection,10C,Lighting Inadequate,,,40.709253,-74.009082
2,TIFFANY CAFE,Manhattan,6,EAST 57 STREET,10022.0,2126135511,2023-03-29T00:00:00.000,Not Applicable,American,No violations were recorded at the time of thi...,0.0,Cycle Inspection / Initial Inspection,,,A,2023-03-29T00:00:00.000,40.762773,-73.973547
3,EMPANADAS RD,Bronx,1971,DR MARTIN L KING JR BLVE,,3477264990,2021-10-08T00:00:00.000,Not Critical,Latin American,Violations were cited in the following area(s).,,Administrative Miscellaneous / Re-inspection,22F,MISBRANDED AND LABELING,,,0.000000,0.000000
4,TU CASA,Queens,10311,QUEENS BLVD,11375.0,7182752421,2022-04-26T00:00:00.000,Critical,Spanish,Violations were cited in the following area(s).,11.0,Cycle Inspection / Initial Inspection,02B,Hot food item not held at or above 140º F.,A,2022-04-26T00:00:00.000,40.724745,-73.850128
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
221974,FARM TO BURGER,Manhattan,310,WEST 40 STREET,10018.0,6466095122,2023-02-10T00:00:00.000,Not Critical,American,Violations were cited in the following area(s).,11.0,Cycle Inspection / Re-inspection,10F,Non-food contact surface or equipment made of ...,A,2023-02-10T00:00:00.000,40.756202,-73.991359
221975,ORCHARD BEACH SNACK BAR SECTION 10,Bronx,1,ORCHARD BEACH ROAD,10464.0,6462294471,2017-06-29T00:00:00.000,Critical,Coffee/Tea,Violations were cited in the following area(s).,11.0,Cycle Inspection / Initial Inspection,04K,Evidence of rats or live rats present in facil...,A,2017-06-29T00:00:00.000,40.865153,-73.802838
221976,VAN LEEUWEN ICE CREAM,Brooklyn,81,BERGEN STREET,11201.0,7187011630,2023-03-27T00:00:00.000,Not Critical,Frozen Desserts,Violations were cited in the following area(s).,8.0,Cycle Inspection / Re-inspection,10G,Dishwashing and ware washing: Cleaning and sa...,A,2023-03-27T00:00:00.000,40.686556,-73.990023
221977,LONA PIZZA,Bronx,373,EAST 204 STREET,10467.0,7186556600,2022-03-17T00:00:00.000,Critical,Pizza,Violations were cited in the following area(s).,33.0,Cycle Inspection / Initial Inspection,02B,Hot food item not held at or above 140º F.,,,40.871567,-73.877236


In [63]:
# split the dataframe based on Boro
df_brooklyn = raw_NYCRest_df_filtered[raw_NYCRest_df_filtered['boro'] == 'Brooklyn']
df_queens = raw_NYCRest_df_filtered[raw_NYCRest_df_filtered['boro'] == 'Queens']
df_manhattan = raw_NYCRest_df_filtered[raw_NYCRest_df_filtered['boro'] == 'Manhattan']
df_bronx = raw_NYCRest_df_filtered[raw_NYCRest_df_filtered['boro'] == 'Bronx']
df_statenisland = raw_NYCRest_df_filtered[raw_NYCRest_df_filtered['boro'] == 'Staten Island']

In [64]:
df_bronx

Unnamed: 0,dba,boro,building,street,zipcode,phone,inspection_date,critical_flag,cuisine_description,action,score,inspection_type,violation_code,violation_description,grade,grade_date,latitude,longitude
3,EMPANADAS RD,Bronx,1971,DR MARTIN L KING JR BLVE,,3477264990,2021-10-08T00:00:00.000,Not Critical,Latin American,Violations were cited in the following area(s).,,Administrative Miscellaneous / Re-inspection,22F,MISBRANDED AND LABELING,,,0.000000,0.000000
5,YUNG HSIN RESTAURANT,Bronx,147577,BEACH AVENUE,,7184095984,2023-05-17T00:00:00.000,Critical,Chinese,Violations were cited in the following area(s).,13.0,Cycle Inspection / Initial Inspection,02B,Hot TCS food item not held at or above 140 °F.,A,2023-05-17T00:00:00.000,0.000000,0.000000
12,MARTIN'S FOODZ 2 GO,Bronx,3836,DYRE AVENUE,10466.0,7185010400,2022-03-08T00:00:00.000,Critical,Caribbean,Violations were cited in the following area(s).,9.0,Cycle Inspection / Initial Inspection,02B,Hot food item not held at or above 140º F.,A,2022-03-08T00:00:00.000,40.888367,-73.831280
16,CORKY'S DINER,Bronx,2535,GRAND CONCOURSE,10468.0,7189332484,2024-01-24T00:00:00.000,Critical,American,Violations were cited in the following area(s).,18.0,Cycle Inspection / Initial Inspection,02B,Hot TCS food item not held at or above 140 °F.,,,40.863278,-73.896514
29,PAPA JOHN'S (STAND 310),Bronx,1,EAST 161 STREET,10451.0,9172843260,2017-07-25T00:00:00.000,Not Applicable,Pizza,No violations were recorded at the time of thi...,0.0,Cycle Inspection / Initial Inspection,,,A,2017-07-25T00:00:00.000,40.829028,-73.928496
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
221948,"SAKE II JAPANESE RESTAURANT, SUSHI & HIBACHI",Bronx,690,EAST 187 STREET,10458.0,7182200988,2023-01-11T00:00:00.000,Critical,Japanese,Violations were cited in the following area(s).,30.0,Cycle Inspection / Re-inspection,04H,"Raw, cooked or prepared food is adulterated, c...",C,2023-01-11T00:00:00.000,40.854192,-73.884643
221969,"DUNKIN',' BASKIN ROBBINS",Bronx,2,EAST KINGSBRIDGE ROAD,10468.0,7187334347,2023-06-22T00:00:00.000,Critical,Donuts,Violations were cited in the following area(s).,7.0,Cycle Inspection / Initial Inspection,06A,Personal cleanliness is inadequate. Outer garm...,A,2023-06-22T00:00:00.000,40.867395,-73.897242
221972,FOO-HING KITCHEN,Bronx,2895,SEDGWICK AVENUE,10468.0,7188846267,2021-09-07T00:00:00.000,Critical,Chinese,Violations were cited in the following area(s).,22.0,Cycle Inspection / Initial Inspection,06B,"Tobacco use, eating, or drinking from open con...",,,40.874944,-73.901262
221975,ORCHARD BEACH SNACK BAR SECTION 10,Bronx,1,ORCHARD BEACH ROAD,10464.0,6462294471,2017-06-29T00:00:00.000,Critical,Coffee/Tea,Violations were cited in the following area(s).,11.0,Cycle Inspection / Initial Inspection,04K,Evidence of rats or live rats present in facil...,A,2017-06-29T00:00:00.000,40.865153,-73.802838


In [65]:
df_bronx.columns

Index(['dba', 'boro', 'building', 'street', 'zipcode', 'phone',
       'inspection_date', 'critical_flag', 'cuisine_description', 'action',
       'score', 'inspection_type', 'violation_code', 'violation_description',
       'grade', 'grade_date', 'latitude', 'longitude'],
      dtype='object')

### Get Yelp.csv from Azure

In [58]:
config_file_path = 'config.json'

# Load the JSON configuration file
with open(config_file_path, 'r') as config_file:
    config = json.load(config_file)

connection_string = config["connectionString"]
container_name = "groupproject"
blob_name = "groupdata3_Yelp.csv"

blob_content = azure_download_blob(connection_string, container_name, blob_name)


yelp_df = pd.read_csv(StringIO(blob_content))
yelp_df

Unnamed: 0,dba,boro,building,street,zipcode,phone,cuisine_description,latitude,longitude,yelp_rating,yelp_review_count
0,NONIS COFFEE SHOP,Bronx,5921,RIVERDALE AVENUE,10471.0,7186010032,American,40.907329,-73.903981,3.5,32.0
1,RIVERDALE STEAK HOUSE,Bronx,5700,RIVERDALE AVENUE,10471.0,7185499946,Steakhouse,40.906719,-73.904123,3.6,72.0
2,SKYCLUB CAFE,Bronx,5701,ARLINGTON AVENUE,10471.0,2016169579,American,40.906648,-73.906844,5.0,1.0
3,CARLOS AND GABBY'S,Bronx,5685,RIVERDALE AVENUE,10471.0,6463610119,Jewish/Kosher,40.906481,-73.904210,3.2,81.0
4,THE PIZZA BLOCK,Bronx,5677,RIVERDALE AVENUE,10471.0,7187087004,Jewish/Kosher,40.906341,-73.904247,2.1,16.0
...,...,...,...,...,...,...,...,...,...,...,...
1346,LOS HERMANOS SALAZAR RESTAURANT,Bronx,645,COURTLANDT AVENUE,10451.0,9174718320,Mexican,40.818888,-73.918492,2.6,71.0
1347,GLACKEN'S BAR,Bronx,135,EAST 149 STREET,10451.0,7185857082,American,40.818870,-73.928497,3.9,15.0
1348,AMERICAN GRILL,Bronx,2987,3 AVENUE,10455.0,7185856978,American,40.818847,-73.914142,3.0,2.0
1349,BRIDGES BAR,Bronx,4100,EAST TREMONT AVENUE,10465.0,3473989144,American,40.818818,-73.817114,3.0,1.0


### Create the Final DataFrame with all needed attributes

In [66]:
merged_df = df_bronx.merge(yelp_df, left_on=['dba','phone'], right_on=['dba','phone'], how='left')
merged_df

Unnamed: 0,dba,boro_x,building_x,street_x,zipcode_x,phone,inspection_date,critical_flag,cuisine_description_x,action,...,longitude_x,boro_y,building_y,street_y,zipcode_y,cuisine_description_y,latitude_y,longitude_y,yelp_rating,yelp_review_count
0,EMPANADAS RD,Bronx,1971,DR MARTIN L KING JR BLVE,,3477264990,2021-10-08T00:00:00.000,Not Critical,Latin American,Violations were cited in the following area(s).,...,0.000000,,,,,,,,,
1,YUNG HSIN RESTAURANT,Bronx,147577,BEACH AVENUE,,7184095984,2023-05-17T00:00:00.000,Critical,Chinese,Violations were cited in the following area(s).,...,0.000000,,,,,,,,,
2,MARTIN'S FOODZ 2 GO,Bronx,3836,DYRE AVENUE,10466.0,7185010400,2022-03-08T00:00:00.000,Critical,Caribbean,Violations were cited in the following area(s).,...,-73.831280,,,,,,,,,
3,CORKY'S DINER,Bronx,2535,GRAND CONCOURSE,10468.0,7189332484,2024-01-24T00:00:00.000,Critical,American,Violations were cited in the following area(s).,...,-73.896514,Bronx,2535,GRAND CONCOURSE,10468.0,American,40.863278,-73.896514,3.0,113.0
4,PAPA JOHN'S (STAND 310),Bronx,1,EAST 161 STREET,10451.0,9172843260,2017-07-25T00:00:00.000,Not Applicable,Pizza,No violations were recorded at the time of thi...,...,-73.928496,Bronx,1,EAST 161 STREET,10451.0,Pizza,40.829028,-73.928496,1.9,24.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19577,"SAKE II JAPANESE RESTAURANT, SUSHI & HIBACHI",Bronx,690,EAST 187 STREET,10458.0,7182200988,2023-01-11T00:00:00.000,Critical,Japanese,Violations were cited in the following area(s).,...,-73.884643,Bronx,690,EAST 187 STREET,10458.0,Japanese,40.854192,-73.884643,2.7,3.0
19578,"DUNKIN',' BASKIN ROBBINS",Bronx,2,EAST KINGSBRIDGE ROAD,10468.0,7187334347,2023-06-22T00:00:00.000,Critical,Donuts,Violations were cited in the following area(s).,...,-73.897242,,,,,,,,,
19579,FOO-HING KITCHEN,Bronx,2895,SEDGWICK AVENUE,10468.0,7188846267,2021-09-07T00:00:00.000,Critical,Chinese,Violations were cited in the following area(s).,...,-73.901262,Bronx,2895,SEDGWICK AVENUE,10468.0,Chinese,40.874944,-73.901262,0.0,0.0
19580,ORCHARD BEACH SNACK BAR SECTION 10,Bronx,1,ORCHARD BEACH ROAD,10464.0,6462294471,2017-06-29T00:00:00.000,Critical,Coffee/Tea,Violations were cited in the following area(s).,...,-73.802838,,,,,,,,,


In [67]:
df_bronx_final = merged_df.copy()
df_bronx_final.columns

Index(['dba', 'boro_x', 'building_x', 'street_x', 'zipcode_x', 'phone',
       'inspection_date', 'critical_flag', 'cuisine_description_x', 'action',
       'score', 'inspection_type', 'violation_code', 'violation_description',
       'grade', 'grade_date', 'latitude_x', 'longitude_x', 'boro_y',
       'building_y', 'street_y', 'zipcode_y', 'cuisine_description_y',
       'latitude_y', 'longitude_y', 'yelp_rating', 'yelp_review_count'],
      dtype='object')

In [68]:
df_bronx_final = df_bronx_final[['dba', 'boro_x', 'building_x', 'street_x', 'zipcode_x', 'phone',
       'inspection_date', 'critical_flag', 'cuisine_description_x', 'action',
       'score', 'inspection_type', 'violation_code', 'violation_description',
       'grade', 'grade_date', 'latitude_x', 'longitude_x', 'yelp_rating',
       'yelp_review_count']]

In [69]:
df_bronx_final.rename(columns={'boro_x': 'boro', 'building_x': 'building', 'street_x': 'street', 'zipcode_x': 'zipcode', 'cuisine_description_x': 'cuisine_description', 'latitude_x': 'latitude', 'longitude_x': 'longitude'}, inplace=True)
df_bronx_final

Unnamed: 0,dba,boro,building,street,zipcode,phone,inspection_date,critical_flag,cuisine_description,action,score,inspection_type,violation_code,violation_description,grade,grade_date,latitude,longitude,yelp_rating,yelp_review_count
0,EMPANADAS RD,Bronx,1971,DR MARTIN L KING JR BLVE,,3477264990,2021-10-08T00:00:00.000,Not Critical,Latin American,Violations were cited in the following area(s).,,Administrative Miscellaneous / Re-inspection,22F,MISBRANDED AND LABELING,,,0.000000,0.000000,,
1,YUNG HSIN RESTAURANT,Bronx,147577,BEACH AVENUE,,7184095984,2023-05-17T00:00:00.000,Critical,Chinese,Violations were cited in the following area(s).,13.0,Cycle Inspection / Initial Inspection,02B,Hot TCS food item not held at or above 140 °F.,A,2023-05-17T00:00:00.000,0.000000,0.000000,,
2,MARTIN'S FOODZ 2 GO,Bronx,3836,DYRE AVENUE,10466.0,7185010400,2022-03-08T00:00:00.000,Critical,Caribbean,Violations were cited in the following area(s).,9.0,Cycle Inspection / Initial Inspection,02B,Hot food item not held at or above 140º F.,A,2022-03-08T00:00:00.000,40.888367,-73.831280,,
3,CORKY'S DINER,Bronx,2535,GRAND CONCOURSE,10468.0,7189332484,2024-01-24T00:00:00.000,Critical,American,Violations were cited in the following area(s).,18.0,Cycle Inspection / Initial Inspection,02B,Hot TCS food item not held at or above 140 °F.,,,40.863278,-73.896514,3.0,113.0
4,PAPA JOHN'S (STAND 310),Bronx,1,EAST 161 STREET,10451.0,9172843260,2017-07-25T00:00:00.000,Not Applicable,Pizza,No violations were recorded at the time of thi...,0.0,Cycle Inspection / Initial Inspection,,,A,2017-07-25T00:00:00.000,40.829028,-73.928496,1.9,24.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19577,"SAKE II JAPANESE RESTAURANT, SUSHI & HIBACHI",Bronx,690,EAST 187 STREET,10458.0,7182200988,2023-01-11T00:00:00.000,Critical,Japanese,Violations were cited in the following area(s).,30.0,Cycle Inspection / Re-inspection,04H,"Raw, cooked or prepared food is adulterated, c...",C,2023-01-11T00:00:00.000,40.854192,-73.884643,2.7,3.0
19578,"DUNKIN',' BASKIN ROBBINS",Bronx,2,EAST KINGSBRIDGE ROAD,10468.0,7187334347,2023-06-22T00:00:00.000,Critical,Donuts,Violations were cited in the following area(s).,7.0,Cycle Inspection / Initial Inspection,06A,Personal cleanliness is inadequate. Outer garm...,A,2023-06-22T00:00:00.000,40.867395,-73.897242,,
19579,FOO-HING KITCHEN,Bronx,2895,SEDGWICK AVENUE,10468.0,7188846267,2021-09-07T00:00:00.000,Critical,Chinese,Violations were cited in the following area(s).,22.0,Cycle Inspection / Initial Inspection,06B,"Tobacco use, eating, or drinking from open con...",,,40.874944,-73.901262,0.0,0.0
19580,ORCHARD BEACH SNACK BAR SECTION 10,Bronx,1,ORCHARD BEACH ROAD,10464.0,6462294471,2017-06-29T00:00:00.000,Critical,Coffee/Tea,Violations were cited in the following area(s).,11.0,Cycle Inspection / Initial Inspection,04K,Evidence of rats or live rats present in facil...,A,2017-06-29T00:00:00.000,40.865153,-73.802838,,


In [70]:
df_bronx_final.dropna(subset=['yelp_rating'], inplace=True)
df_bronx_final['inspection_date'] = pd.to_datetime(df_bronx_final['inspection_date'])
df_bronx_final['grade_date'] = pd.to_datetime(df_bronx_final['grade_date'])

df_bronx_final

Unnamed: 0,dba,boro,building,street,zipcode,phone,inspection_date,critical_flag,cuisine_description,action,score,inspection_type,violation_code,violation_description,grade,grade_date,latitude,longitude,yelp_rating,yelp_review_count
3,CORKY'S DINER,Bronx,2535,GRAND CONCOURSE,10468.0,7189332484,2024-01-24,Critical,American,Violations were cited in the following area(s).,18.0,Cycle Inspection / Initial Inspection,02B,Hot TCS food item not held at or above 140 °F.,,NaT,40.863278,-73.896514,3.0,113.0
4,PAPA JOHN'S (STAND 310),Bronx,1,EAST 161 STREET,10451.0,9172843260,2017-07-25,Not Applicable,Pizza,No violations were recorded at the time of thi...,0.0,Cycle Inspection / Initial Inspection,,,A,2017-07-25,40.829028,-73.928496,1.9,24.0
5,JADE PALACE,Bronx,163,EINSTEIN LOOP,10475.0,7183201584,2022-03-23,Critical,Chinese,Violations were cited in the following area(s).,31.0,Cycle Inspection / Initial Inspection,02B,Hot food item not held at or above 140º F.,,NaT,40.864063,-73.822546,2.6,11.0
6,PINE BAR & GRILL,Bronx,1634,EASTCHESTER ROAD,10461.0,7183190900,2017-10-12,Critical,Italian,Violations were cited in the following area(s).,12.0,Cycle Inspection / Re-inspection,06F,Wiping cloths soiled or not stored in sanitizi...,A,2017-10-12,40.845277,-73.845095,3.0,2.0
7,LA ROLA RESTAURANT,Bronx,400,EAST 198 STREET,10458.0,9176881449,2024-03-06,Not Applicable,Spanish,Establishment re-opened by DOHMH.,0.0,Cycle Inspection / Reopening Inspection,,,Z,2024-03-06,40.866021,-73.886021,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19571,XIN HI CHINESE BUFFETT,Bronx,2053,BARTOW AVENUE,10475.0,7183792200,2023-01-20,Not Critical,Chinese,Violations were cited in the following area(s).,5.0,Cycle Inspection / Re-inspection,28-06,Contract with a pest management professional n...,A,2023-01-20,40.869468,-73.828515,4.0,5.0
19572,PROSPECT COFFEE SHOP,Bronx,1309,PROSPECT AVENUE,10459.0,7189911249,2023-03-20,Not Critical,American,Violations were cited in the following area(s).,9.0,Cycle Inspection / Initial Inspection,10F,Non-food contact surface or equipment made of ...,A,2023-03-20,40.828042,-73.898148,0.0,0.0
19577,"SAKE II JAPANESE RESTAURANT, SUSHI & HIBACHI",Bronx,690,EAST 187 STREET,10458.0,7182200988,2023-01-11,Critical,Japanese,Violations were cited in the following area(s).,30.0,Cycle Inspection / Re-inspection,04H,"Raw, cooked or prepared food is adulterated, c...",C,2023-01-11,40.854192,-73.884643,2.7,3.0
19579,FOO-HING KITCHEN,Bronx,2895,SEDGWICK AVENUE,10468.0,7188846267,2021-09-07,Critical,Chinese,Violations were cited in the following area(s).,22.0,Cycle Inspection / Initial Inspection,06B,"Tobacco use, eating, or drinking from open con...",,NaT,40.874944,-73.901262,0.0,0.0


In [71]:
# check dba / phone / cuisine_description
same_dba = df_bronx_final.groupby('dba').filter(lambda x: len(x) > 1)
different_phone_cuisine = same_dba.groupby('dba').filter(lambda x: len(x['phone'].unique()) > 1 or len(x['cuisine_description'].unique()) > 1)

print(different_phone_cuisine)

           dba   boro building               street  zipcode       phone  \
4528   LA MASA  Bronx     3399  EAST TREMONT AVENUE  10461.0  9177089247   
8110   LA MASA  Bronx     3399  EAST TREMONT AVENUE  10461.0  9177089247   
11479  LA MASA  Bronx     2542  EAST TREMONT AVENUE  10461.0  9177089247   
12197  LA MASA  Bronx     2542  EAST TREMONT AVENUE  10461.0  9177089247   

      inspection_date critical_flag cuisine_description  \
4528       2023-07-19  Not Critical             Spanish   
8110       2023-07-19  Not Critical             Spanish   
11479      2023-08-01      Critical               Other   
12197      2023-08-01  Not Critical               Other   

                                                action  score  \
4528   Violations were cited in the following area(s).    5.0   
8110   Violations were cited in the following area(s).    5.0   
11479  Violations were cited in the following area(s).    7.0   
12197  Violations were cited in the following area(s).    7.0  

In [72]:
df_bronx_final = df_bronx_final[~((df_bronx_final['dba'] == 'LA MASA') & (df_bronx_final['cuisine_description'] == 'Other'))]
df_bronx_final

Unnamed: 0,dba,boro,building,street,zipcode,phone,inspection_date,critical_flag,cuisine_description,action,score,inspection_type,violation_code,violation_description,grade,grade_date,latitude,longitude,yelp_rating,yelp_review_count
3,CORKY'S DINER,Bronx,2535,GRAND CONCOURSE,10468.0,7189332484,2024-01-24,Critical,American,Violations were cited in the following area(s).,18.0,Cycle Inspection / Initial Inspection,02B,Hot TCS food item not held at or above 140 °F.,,NaT,40.863278,-73.896514,3.0,113.0
4,PAPA JOHN'S (STAND 310),Bronx,1,EAST 161 STREET,10451.0,9172843260,2017-07-25,Not Applicable,Pizza,No violations were recorded at the time of thi...,0.0,Cycle Inspection / Initial Inspection,,,A,2017-07-25,40.829028,-73.928496,1.9,24.0
5,JADE PALACE,Bronx,163,EINSTEIN LOOP,10475.0,7183201584,2022-03-23,Critical,Chinese,Violations were cited in the following area(s).,31.0,Cycle Inspection / Initial Inspection,02B,Hot food item not held at or above 140º F.,,NaT,40.864063,-73.822546,2.6,11.0
6,PINE BAR & GRILL,Bronx,1634,EASTCHESTER ROAD,10461.0,7183190900,2017-10-12,Critical,Italian,Violations were cited in the following area(s).,12.0,Cycle Inspection / Re-inspection,06F,Wiping cloths soiled or not stored in sanitizi...,A,2017-10-12,40.845277,-73.845095,3.0,2.0
7,LA ROLA RESTAURANT,Bronx,400,EAST 198 STREET,10458.0,9176881449,2024-03-06,Not Applicable,Spanish,Establishment re-opened by DOHMH.,0.0,Cycle Inspection / Reopening Inspection,,,Z,2024-03-06,40.866021,-73.886021,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19571,XIN HI CHINESE BUFFETT,Bronx,2053,BARTOW AVENUE,10475.0,7183792200,2023-01-20,Not Critical,Chinese,Violations were cited in the following area(s).,5.0,Cycle Inspection / Re-inspection,28-06,Contract with a pest management professional n...,A,2023-01-20,40.869468,-73.828515,4.0,5.0
19572,PROSPECT COFFEE SHOP,Bronx,1309,PROSPECT AVENUE,10459.0,7189911249,2023-03-20,Not Critical,American,Violations were cited in the following area(s).,9.0,Cycle Inspection / Initial Inspection,10F,Non-food contact surface or equipment made of ...,A,2023-03-20,40.828042,-73.898148,0.0,0.0
19577,"SAKE II JAPANESE RESTAURANT, SUSHI & HIBACHI",Bronx,690,EAST 187 STREET,10458.0,7182200988,2023-01-11,Critical,Japanese,Violations were cited in the following area(s).,30.0,Cycle Inspection / Re-inspection,04H,"Raw, cooked or prepared food is adulterated, c...",C,2023-01-11,40.854192,-73.884643,2.7,3.0
19579,FOO-HING KITCHEN,Bronx,2895,SEDGWICK AVENUE,10468.0,7188846267,2021-09-07,Critical,Chinese,Violations were cited in the following area(s).,22.0,Cycle Inspection / Initial Inspection,06B,"Tobacco use, eating, or drinking from open con...",,NaT,40.874944,-73.901262,0.0,0.0


### Upload Final DataFrame to Azure (Merge_df_Bronx.csv)

In [73]:
config_file_path = 'config.json'

with open(config_file_path, 'r') as config_file:
    config = json.load(config_file)

CONNECTION_STRING_AZURE_STORAGE = config["connectionString"]
CONTAINER_AZURE = "groupproject"
blob_name = "groupdata4_Merge_df_Bronx.csv"

# Convert DataFrame to CSV
output = StringIO()
df_bronx_final.to_csv(output, index=False)
data = output.getvalue()
output.close()

# Create the BlobServiceClient object
blob_service_client = BlobServiceClient.from_connection_string(CONNECTION_STRING_AZURE_STORAGE)

# Get a blob client using the container name and blob name
blob_client = blob_service_client.get_blob_client(container=CONTAINER_AZURE, blob=blob_name)

# Upload the CSV data
blob_client.upload_blob(data, overwrite=True)

print(f"Uploaded {blob_name} to Azure Blob Storage in container {CONTAINER_AZURE}.")

Uploaded groupdata4_Merge_df_Bronx.csv to Azure Blob Storage in container groupproject.
