# 🚦 City Traffic Pattern Clustering
This notebook demonstrates the full workflow for clustering city traffic patterns using KMeans.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.decomposition import PCA


## 📥 Load Dataset

In [None]:
df = pd.read_csv("data/simulated_city_traffic.csv")
df.head()


## 🔍 Exploratory Data Analysis

In [None]:
df.describe(include="all")


In [None]:
sns.histplot(df["Avg_Speed"], kde=True)
plt.title("Average Speed Distribution")
plt.show()


## 🧹 Preprocessing Setup

In [None]:
numeric_features = ["Avg_Speed", "Vehicle_Count", "Latitude", "Longitude"]
categorical_features = ["Time_of_Day", "Day_of_Week", "Weather_Condition"]

numeric_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(drop="first", handle_unknown="ignore")

preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, numeric_features),
        ("cat", categorical_transformer, categorical_features)
    ]
)

X = preprocessor.fit_transform(df)


## 🧠 Model Training

In [None]:
model = KMeans(n_clusters=5, random_state=42)
model.fit(X)
df["Cluster"] = model.predict(X)


## 📊 Model Evaluation

In [None]:
score = silhouette_score(X, df["Cluster"])
print(f"Silhouette Score: {score:.4f}")


## 📉 PCA Visualization of Clusters

In [None]:
pca = PCA(n_components=2)
reduced = pca.fit_transform(X.toarray() if hasattr(X, "toarray") else X)
df["PCA1"], df["PCA2"] = reduced[:, 0], reduced[:, 1]

sns.scatterplot(data=df, x="PCA1", y="PCA2", hue="Cluster", palette="tab10", s=100)
plt.title("Traffic Clusters (PCA View)")
plt.show()


## 💾 Save Model and Preprocessor

In [None]:
import os
os.makedirs("model", exist_ok=True)
with open("model/kmeans_model.pkl", "wb") as f:
    pickle.dump(model, f)
with open("model/preprocessor.pkl", "wb") as f:
    pickle.dump(preprocessor, f)
