In [3]:
pip install pandas plotly

Collecting plotly
  Obtaining dependency information for plotly from https://files.pythonhosted.org/packages/8a/67/f95b5460f127840310d2187f916cf0023b5875c0717fdf893f71e1325e87/plotly-6.5.2-py3-none-any.whl.metadata
  Downloading plotly-6.5.2-py3-none-any.whl.metadata (8.5 kB)
Collecting narwhals>=1.15.1 (from plotly)
  Obtaining dependency information for narwhals>=1.15.1 from https://files.pythonhosted.org/packages/3d/2e/cf2ffeb386ac3763526151163ad7da9f1b586aac96d2b4f7de1eaebf0c61/narwhals-2.15.0-py3-none-any.whl.metadata
  Downloading narwhals-2.15.0-py3-none-any.whl.metadata (13 kB)
Downloading plotly-6.5.2-py3-none-any.whl (9.9 MB)
   ---------------------------------------- 0.0/9.9 MB ? eta -:--:--
   ---------------------------------------- 0.0/9.9 MB 1.9 MB/s eta 0:00:06
    --------------------------------------- 0.2/9.9 MB 2.3 MB/s eta 0:00:05
   - -------------------------------------- 0.4/9.9 MB 3.1 MB/s eta 0:00:04
   -- ------------------------------------- 0.6/9.9 MB 3.8 

In [4]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

df = pd.read_csv('../data/inspections_clean.csv')
df['INSPECTION_DATE'] = pd.to_datetime(df['INSPECTION_DATE'])
print(df.shape)
df.head()

(51839, 14)


Unnamed: 0,CAMIS,INSPECTION_DATE,RESTAURANT_NAME,BORO,ZIPCODE,CUISINE,SCORE,GRADE,TOTAL_VIOLATIONS,CRITICAL_VIOLATIONS,YEAR,MONTH,DAY_OF_WEEK,QUARTER
0,30075445,2023-08-22,MORRIS PARK BAKE SHOP,Bronx,10462.0,Bakery Products/Desserts,12.0,A,3,1,2023,8,1,3
1,30075445,2024-11-08,MORRIS PARK BAKE SHOP,Bronx,10462.0,Bakery Products/Desserts,10.0,A,3,1,2024,11,4,4
2,30191841,2023-04-23,D.J. REYNOLDS,Manhattan,10019.0,Irish,10.0,A,2,2,2023,4,6,2
3,30191841,2025-02-20,D.J. REYNOLDS,Manhattan,10019.0,Irish,10.0,A,2,2,2025,2,3,1
4,40356018,2024-04-16,RIVIERA CATERERS,Brooklyn,11224.0,American,0.0,A,0,0,2024,4,1,2


In [5]:
# Grade by borough
fig = px.histogram(df, x='BORO', color='GRADE',
                   title='Grade Distribution by Borough',
                   barmode='group',
                   color_discrete_map={'A': 'green', 'B': 'orange', 'C': 'red'})
fig.update_layout(xaxis_title='Borough', yaxis_title='Count')
fig.show()

In [6]:
# Scores over time
monthly = df.groupby(df['INSPECTION_DATE'].dt.to_period('M'))['SCORE'].mean().reset_index()
monthly['INSPECTION_DATE'] = monthly['INSPECTION_DATE'].dt.to_timestamp()

fig = px.line(monthly, x='INSPECTION_DATE', y='SCORE',
              title='Average Inspection Score Over Time')
fig.update_layout(xaxis_title='Date', yaxis_title='Average Score')
fig.show()

In [7]:
# Top cuisines by avg score
cuisine_stats = df.groupby('CUISINE').agg({
    'SCORE': 'mean',
    'CAMIS': 'count'
}).reset_index()
cuisine_stats.columns = ['CUISINE', 'AVG_SCORE', 'COUNT']
cuisine_stats = cuisine_stats[cuisine_stats['COUNT'] >= 100]
top_15 = cuisine_stats.nlargest(15, 'AVG_SCORE')

fig = px.bar(top_15, x='AVG_SCORE', y='CUISINE', orientation='h',
             title='Top 15 Cuisines by Average Score (min 100 inspections)')
fig.update_layout(yaxis={'categoryorder':'total ascending'})
fig.show()