In [1]:
import pandas as pd
from sodapy import Socrata

client = Socrata("data.cityofnewyork.us", None)

# First 2000 results, returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
limit = 10000
results = client.get("43nn-pn8j", limit=limit)

# Convert to pandas DataFrame
results_df = pd.DataFrame.from_records(results)



In [2]:
results_df['action'].value_counts()

action
Violations were cited in the following area(s).                                                                                       8516
Establishment Closed by DOHMH. Violations were cited in the following area(s) and those requiring immediate action were addressed.     307
No violations were recorded at the time of this inspection.                                                                            123
Establishment re-opened by DOHMH.                                                                                                       76
Establishment re-closed by DOHMH.                                                                                                       59
Name: count, dtype: int64

In [3]:
results_df.head()

Unnamed: 0,camis,dba,boro,building,street,phone,cuisine_description,inspection_date,action,violation_code,...,community_board,council_district,census_tract,bin,nta,location,:@computed_region_f5dn_yrer,:@computed_region_yeji_bk3q,:@computed_region_sbqj_enih,:@computed_region_92fq_4b7q
0,41032752,DONS BOGAM,Manhattan,1517,EAST 32 STREET,2126832200,Korean,2025-02-19T00:00:00.000,Violations were cited in the following area(s).,02B,...,,,,,,,,,,
1,50158848,AURA 57,Manhattan,130,EAST 57 STREET,9173882963,,1900-01-01T00:00:00.000,,,...,105.0,4.0,11203.0,1036890.0,MN19,"{'type': 'Point', 'coordinates': [-73.96948654...",11.0,4.0,10.0,51.0
2,50164505,DUBUHAUS,Manhattan,6,EAST 32 STREET,9178600745,,1900-01-01T00:00:00.000,,,...,105.0,4.0,7400.0,1017003.0,MN17,"{'type': 'Point', 'coordinates': [-73.98505869...",11.0,4.0,8.0,51.0
3,50121723,FRYING PAN WHEELHOUSE,Brooklyn,33,35 STREET,9178802190,,1900-01-01T00:00:00.000,,,...,307.0,38.0,1800.0,3336900.0,BK32,"{'type': 'Point', 'coordinates': [-74.00735241...",9.0,2.0,45.0,7.0
4,50172970,SPIRALS,Manhattan,137,1 AVENUE,2013672187,,1900-01-01T00:00:00.000,,,...,103.0,2.0,3800.0,1077697.0,MN22,"{'type': 'Point', 'coordinates': [-73.98513516...",70.0,4.0,5.0,50.0


In [4]:
results_df.columns

Index(['camis', 'dba', 'boro', 'building', 'street', 'phone',
       'cuisine_description', 'inspection_date', 'action', 'violation_code',
       'violation_description', 'critical_flag', 'score', 'grade',
       'grade_date', 'record_date', 'inspection_type', 'latitude', 'longitude',
       'bbl', 'zipcode', 'community_board', 'council_district', 'census_tract',
       'bin', 'nta', 'location', ':@computed_region_f5dn_yrer',
       ':@computed_region_yeji_bk3q', ':@computed_region_sbqj_enih',
       ':@computed_region_92fq_4b7q'],
      dtype='object')

In [5]:
boroughs = results_df['boro'].unique()
boroughs


array(['Manhattan', 'Brooklyn', 'Queens', 'Bronx', 'Staten Island', '0'],
      dtype=object)

In [6]:
# replace '0' in the 'boro' column with 'Unknown'
results_df['boro'] = results_df['boro'].replace('0', 'Unknown')

In [7]:
results_df['boro'].value_counts()

boro
Manhattan        3747
Brooklyn         2609
Queens           2401
Bronx             900
Staten Island     332
Unknown            11
Name: count, dtype: int64

In [8]:
results_df.drop(columns=[':@computed_region_f5dn_yrer', ':@computed_region_yeji_bk3q', ':@computed_region_sbqj_enih', ':@computed_region_92fq_4b7q'], inplace=True)

In [9]:
results_df.head()

Unnamed: 0,camis,dba,boro,building,street,phone,cuisine_description,inspection_date,action,violation_code,...,latitude,longitude,bbl,zipcode,community_board,council_district,census_tract,bin,nta,location
0,41032752,DONS BOGAM,Manhattan,1517,EAST 32 STREET,2126832200,Korean,2025-02-19T00:00:00.000,Violations were cited in the following area(s).,02B,...,0.0,0.0,1,,,,,,,
1,50158848,AURA 57,Manhattan,130,EAST 57 STREET,9173882963,,1900-01-01T00:00:00.000,,,...,40.761061935614,-73.96948654606,1013110058,10022.0,105.0,4.0,11203.0,1036890.0,MN19,"{'type': 'Point', 'coordinates': [-73.96948654..."
2,50164505,DUBUHAUS,Manhattan,6,EAST 32 STREET,9178600745,,1900-01-01T00:00:00.000,,,...,40.746951463756,-73.985058696576,1008610072,10016.0,105.0,4.0,7400.0,1017003.0,MN17,"{'type': 'Point', 'coordinates': [-73.98505869..."
3,50121723,FRYING PAN WHEELHOUSE,Brooklyn,33,35 STREET,9178802190,,1900-01-01T00:00:00.000,,,...,40.657011546891,-74.007352417094,3006870001,11232.0,307.0,38.0,1800.0,3336900.0,BK32,"{'type': 'Point', 'coordinates': [-74.00735241..."
4,50172970,SPIRALS,Manhattan,137,1 AVENUE,2013672187,,1900-01-01T00:00:00.000,,,...,40.72781498039,-73.985135163126,1004500033,10003.0,103.0,2.0,3800.0,1077697.0,MN22,"{'type': 'Point', 'coordinates': [-73.98513516..."


In [10]:
results_df['inspection_date'] = pd.to_datetime(results_df['inspection_date'])
results_df['inspection_date'].value_counts()

inspection_date
1900-01-01    919
2024-12-17     26
2025-01-14     26
2024-06-13     25
2024-01-31     24
             ... 
2024-06-16      1
2023-07-14      1
2023-01-29      1
2023-09-22      1
2021-09-10      1
Name: count, Length: 1169, dtype: int64

In [11]:
results_df['inspection_date'].value_counts()

inspection_date
1900-01-01    919
2024-12-17     26
2025-01-14     26
2024-06-13     25
2024-01-31     24
             ... 
2024-06-16      1
2023-07-14      1
2023-01-29      1
2023-09-22      1
2021-09-10      1
Name: count, Length: 1169, dtype: int64

In [12]:
results_df.head()

Unnamed: 0,camis,dba,boro,building,street,phone,cuisine_description,inspection_date,action,violation_code,...,latitude,longitude,bbl,zipcode,community_board,council_district,census_tract,bin,nta,location
0,41032752,DONS BOGAM,Manhattan,1517,EAST 32 STREET,2126832200,Korean,2025-02-19,Violations were cited in the following area(s).,02B,...,0.0,0.0,1,,,,,,,
1,50158848,AURA 57,Manhattan,130,EAST 57 STREET,9173882963,,1900-01-01,,,...,40.761061935614,-73.96948654606,1013110058,10022.0,105.0,4.0,11203.0,1036890.0,MN19,"{'type': 'Point', 'coordinates': [-73.96948654..."
2,50164505,DUBUHAUS,Manhattan,6,EAST 32 STREET,9178600745,,1900-01-01,,,...,40.746951463756,-73.985058696576,1008610072,10016.0,105.0,4.0,7400.0,1017003.0,MN17,"{'type': 'Point', 'coordinates': [-73.98505869..."
3,50121723,FRYING PAN WHEELHOUSE,Brooklyn,33,35 STREET,9178802190,,1900-01-01,,,...,40.657011546891,-74.007352417094,3006870001,11232.0,307.0,38.0,1800.0,3336900.0,BK32,"{'type': 'Point', 'coordinates': [-74.00735241..."
4,50172970,SPIRALS,Manhattan,137,1 AVENUE,2013672187,,1900-01-01,,,...,40.72781498039,-73.985135163126,1004500033,10003.0,103.0,2.0,3800.0,1077697.0,MN22,"{'type': 'Point', 'coordinates': [-73.98513516..."


In [13]:
results_df['Inspected'] = results_df['inspection_date'] != '1900-01-01'
results_df['Inspected'].value_counts()

Inspected
True     9081
False     919
Name: count, dtype: int64

In [14]:
results_df.head()

Unnamed: 0,camis,dba,boro,building,street,phone,cuisine_description,inspection_date,action,violation_code,...,longitude,bbl,zipcode,community_board,council_district,census_tract,bin,nta,location,Inspected
0,41032752,DONS BOGAM,Manhattan,1517,EAST 32 STREET,2126832200,Korean,2025-02-19,Violations were cited in the following area(s).,02B,...,0.0,1,,,,,,,,True
1,50158848,AURA 57,Manhattan,130,EAST 57 STREET,9173882963,,1900-01-01,,,...,-73.96948654606,1013110058,10022.0,105.0,4.0,11203.0,1036890.0,MN19,"{'type': 'Point', 'coordinates': [-73.96948654...",False
2,50164505,DUBUHAUS,Manhattan,6,EAST 32 STREET,9178600745,,1900-01-01,,,...,-73.985058696576,1008610072,10016.0,105.0,4.0,7400.0,1017003.0,MN17,"{'type': 'Point', 'coordinates': [-73.98505869...",False
3,50121723,FRYING PAN WHEELHOUSE,Brooklyn,33,35 STREET,9178802190,,1900-01-01,,,...,-74.007352417094,3006870001,11232.0,307.0,38.0,1800.0,3336900.0,BK32,"{'type': 'Point', 'coordinates': [-74.00735241...",False
4,50172970,SPIRALS,Manhattan,137,1 AVENUE,2013672187,,1900-01-01,,,...,-73.985135163126,1004500033,10003.0,103.0,2.0,3800.0,1077697.0,MN22,"{'type': 'Point', 'coordinates': [-73.98513516...",False


In [15]:
results_df['violation_code'].value_counts()

violation_code
10F      1206
08A       838
06C       587
06D       566
10B       562
         ... 
22A         1
03C         1
16L         1
03G         1
16-08       1
Name: count, Length: 102, dtype: int64

In [16]:
violation_codes_df = results_df[['violation_code', 'violation_description']].drop_duplicates().dropna().reset_index(drop=True)
violation_codes_df

Unnamed: 0,violation_code,violation_description
0,02B,Hot TCS food item not held at or above 140 °F.
1,18-11,Food Protection Certificate not available for ...
2,04K,Evidence of rats or live rats in establishment...
3,04M,Live roaches in facility's food or non-food area.
4,09B,Thawing procedure improper.
...,...,...
158,16B,The original nutritional fact labels and/or in...
159,20-07,"Letter grade or Grade Pending card removed, de..."
160,05C,Food contact surface improperly constructed or...
161,03E,No or inadequate potable water supply. Water o...


In [17]:
cuisines_df = results_df[['cuisine_description']].drop_duplicates().dropna().reset_index(drop=True)
cuisines_df

Unnamed: 0,cuisine_description
0,Korean
1,Mexican
2,African
3,Coffee/Tea
4,Latin American
...,...
83,Haute Cuisine
84,Hotdogs
85,Not Listed/Not Applicable
86,Portuguese


In [18]:
restaurants_df = results_df[['dba', 'camis', 'building', 'street', 'zipcode', 'boro', 'phone', 'cuisine_description']].drop_duplicates().dropna().reset_index(drop=True)
restaurants_df

Unnamed: 0,dba,camis,building,street,zipcode,boro,phone,cuisine_description
0,CAFFE CORRETTO,50136993,6502,20 AVENUE,11204,Brooklyn,9178372904,Coffee/Tea
1,LAGOS TSQ,50142785,727,7 AVENUE,10019,Manhattan,3474986849,African
2,LA ELEGANCIA,50086366,8826,37TH AVE,11372,Queens,3478463569,Latin American
3,PALENQUE COLOMBIAN FOOD,50101946,298,GRAHAM AVENUE,11211,Brooklyn,7189541955,Spanish
4,PICCOLA CUCINA,50059594,75,THOMPSON STREET,10012,Manhattan,9172831065,Italian
...,...,...,...,...,...,...,...,...
7122,UBANI,50112225,37A,BEDFORD STREET,10014,Manhattan,6466785595,Eastern European
7123,MONOMONO,50077626,116,EAST 4 STREET,10003,Manhattan,9172855034,Asian/Asian Fusion
7124,CHEN'S BROTHER CUISINE,50138024,95,WATER STREET,10304,Staten Island,9176358307,Chinese
7125,FAIRFIELD INN & SUITES NEW YORK MANHATTAN FINA...,50017223,161,FRONT STREET,10038,Manhattan,6467685998,Continental
