In [None]:
````xml
<VSCode.Cell language="markdown">
# Traffic Prediction Model Training
Train a machine learning model to predict traffic congestion levels
</VSCode.Cell>
<VSCode.Cell language="python">
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib
import warnings
warnings.filterwarnings('ignore')

print("=" * 60)
print("TRAFFIC CONGESTION PREDICTION MODEL")
print("=" * 60)
</VSCode.Cell>
<VSCode.Cell language="python">
# Create synthetic traffic dataset
np.random.seed(42)

# Features: hour, day_of_week, vehicle_count, avg_speed, weather
n_samples = 1000

data = {
    'hour': np.random.randint(0, 24, n_samples),
    'day_of_week': np.random.randint(0, 7, n_samples),
    'vehicle_count': np.random.randint(10, 500, n_samples),
    'avg_speed': np.random.uniform(10, 80, n_samples),
    'weather': np.random.choice([0, 1, 2], n_samples),  # 0=sunny, 1=rainy, 2=foggy
}

# Create target: congestion level (0=low, 1=medium, 2=high)
traffic_df = pd.DataFrame(data)
traffic_df['congestion'] = (
    (traffic_df['vehicle_count'] > 250).astype(int) * 2 +
    (traffic_df['vehicle_count'] > 150).astype(int) +
    (traffic_df['avg_speed'] < 30).astype(int)
).clip(0, 2)

print(f"Dataset shape: {traffic_df.shape}")
print(f"\nDataset preview:")
print(traffic_df.head())
print(f"\nTarget distribution:")
print(traffic_df['congestion'].value_counts().sort_index())
</VSCode.Cell>
<VSCode.Cell language="python">
# Prepare features and target
X = traffic_df.drop('congestion', axis=1)
y = traffic_df['congestion']

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(f"Training set size: {X_train.shape[0]}")
print(f"Test set size: {X_test.shape[0]}")
</VSCode.Cell>
<VSCode.Cell language="python">
# Train Random Forest model
print("\nTraining Random Forest Classifier...")

model = RandomForestClassifier(
    n_estimators=100,
    max_depth=10,
    random_state=42,
    n_jobs=-1
)

model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")
print(f"\nClassification Report:")
print(classification_report(y_test, y_pred, 
                          target_names=['Low', 'Medium', 'High']))
</VSCode.Cell>
<VSCode.Cell language="python">
# Save model
import os
os.makedirs('../models', exist_ok=True)

model_path = '../models/traffic_model.pkl'
joblib.dump(model, model_path)
print(f"\n✓ Model saved to {model_path}")

# Save feature names
joblib.dump(X.columns.tolist(), '../models/traffic_features.pkl')
print(f"✓ Features saved")
</VSCode.Cell>
<VSCode.Cell language="python">
# Feature importance
print("\nFeature Importance:")
feature_importance = pd.DataFrame({
    'feature': X.columns,
    'importance': model.feature_importances_
}).sort_values('importance', ascending=False)

print(feature_importance)
</VSCode.Cell>
````