# Environmental Health ML – Camden County Inspection Routing
This notebook analyzes synthetic restaurant inspection data in Camden County, NJ. It scores risk based on past violations and visualizes geographic patterns using clustering and risk thresholds.

## Step 1: Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
import folium
from folium.plugins import MarkerCluster
sns.set(style='whitegrid')

## Step 2: Load the Inspection Dataset

In [None]:
df = pd.read_csv('../data/camden_inspection_risk_data.csv')
df['LastInspectionDate'] = pd.to_datetime(df['LastInspectionDate'])
df.head()

## Step 3: Label High Risk Establishments

In [None]:
df['RiskLevel'] = pd.cut(df['RiskScore'], bins=[0, 0.4, 0.7, 1.0], labels=['Low', 'Medium', 'High'])
df['HighRisk'] = df['RiskScore'] > 0.7
df['RiskLevel'].value_counts()

## Step 4: Map of High Risk Establishments in Camden County

In [None]:
m = folium.Map(location=[39.9, -75.05], zoom_start=11)
marker_cluster = MarkerCluster().add_to(m)

for _, row in df[df['HighRisk']].iterrows():
    folium.Marker(
        location=[row['Latitude'], row['Longitude']],
        popup=f"""<b>{row['RestaurantID']}</b><br>
        {row['Category']}<br>
        City: {row['City']}<br>
        Violations: {row['PastViolations']}<br>
        Risk Score: {row['RiskScore']}""",
        icon=folium.Icon(color='red', icon='exclamation-triangle', prefix='fa')
    ).add_to(marker_cluster)

m.save('../output/camden_high_risk_map.html')
m

## Step 5: Spatial Clustering of Inspections (KMeans)

In [None]:
coords = df[['Latitude', 'Longitude']]
kmeans = KMeans(n_clusters=5, random_state=42).fit(coords)
df['Cluster'] = kmeans.labels_

plt.figure(figsize=(8, 6))
sns.scatterplot(data=df, x='Longitude', y='Latitude', hue='Cluster', palette='Set2')
plt.title('KMeans Clustering of Inspections')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.grid(True)
plt.tight_layout()
plt.show()