In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
import random


In [3]:
def assign_congestion(row):
    score = 0

    # Base scoring
    score += row['vehicle_count'] / 100
    score += row['vehicle_mix_ratio'] * 2

    # Road factors
    if row['road_lanes'] >= 4 and row['road_type'] == 'Highway':
        score -= 2
    elif row['road_lanes'] >= 3 and row['road_type'] in ['Highway', 'One-way']:
        score -= 1
    if row['road_type'] == 'Mountain':
        score += 0.5

    # Vehicle count effect
    if row['vehicle_count'] > 350: score += 1.5
    elif row['vehicle_count'] > 250: score += 1.0
    elif row['vehicle_count'] > 150: score += 0.5
    elif row['vehicle_count'] < 50: score -= 0.5

    # Time & Day
    score += {
        ('Morning', 'Weekday'): 0.5,
        ('Morning', 'Weekend'): -1.5,
        ('Afternoon', ''): 0.2,
        ('Evening', ''): 0.7,
        ('Night', ''): -1.0
    }.get((row['time_of_day'], row['day_type']), 0)

    # Weather
    score += {
        'Sunny': -0.5,
        'Rainy': 0.8,
        'Foggy': 1.0,
        'Snowy': 1.5
    }.get(row['weather'], 0)

    if row['weather'] == 'Snowy' and row['road_type'] == 'Mountain':
        score += 1
    if row['weather'] == 'Rainy' and row['vehicle_count'] > 250:
        score += 1
    if row['weather'] == 'Foggy' and row['time_of_day'] == 'Night':
        score += 1
    if row['time_of_day'] == 'Evening' and row['day_type'] == 'Weekday':
        score += 0.5

    return "Low" if score < 2 else "Medium" if score < 4 else "High"


In [4]:
df = pd.read_csv("traffic_data.csv")
df['congestion_level'].value_counts()


congestion_level
Medium    300
High      300
Low       300
Name: count, dtype: int64

In [5]:
data = df.copy()

time_map = {
    'Morning': 0,
    'Afternoon': 1,
    'Evening': 2,
    'Night': 3
}

day_map = {
    'Weekday': 0,
    'Weekend': 1
}

weather_map = {
    'Sunny': 0,
    'Rainy': 1,
    'Foggy': 2,
    'Snowy': 3
}

road_map = {
    'Highway': 0,
    'Local': 1,
    'One-way': 2,
    'Mountain': 3
}

congestion_map = {
    'Low': 0,
    'Medium': 1,
    'High': 2
}

data['time_of_day'] = data['time_of_day'].map(time_map)
data['day_type'] = data['day_type'].map(day_map)
data['weather'] = data['weather'].map(weather_map)
data['road_type'] = data['road_type'].map(road_map)
data['congestion_level'] = data['congestion_level'].map(congestion_map)


data.head(10)

Unnamed: 0,time_of_day,day_type,weather,road_type,vehicle_count,road_lanes,vehicle_mix_ratio,congestion_level
0,3,1,2,2,137,6,0.79,1
1,0,0,1,3,386,3,0.59,2
2,1,0,1,2,249,2,0.06,1
3,1,0,1,3,211,5,0.25,1
4,1,1,2,0,64,1,0.61,0
5,1,1,1,0,116,5,0.92,0
6,0,0,0,0,44,4,0.9,0
7,1,1,1,0,343,6,0.06,0
8,3,0,1,3,81,3,0.79,1
9,3,0,2,1,169,1,0.94,1


In [6]:
X= data.drop('congestion_level', axis=1) # inputs we use to make a prediction Features(X) , removes the target col from the rest of the dataset
y = data['congestion_level'] # The actual output we want to predict Label(y)

X_train, X_test, y_train, y_test = train_test_split(
    X,y, test_size=0.2, random_state =42
) # 80% of the data is used for training and 20% for testing

models ={
    "Random Forest": RandomForestClassifier(), # good accuracy
    "Decision Tree": DecisionTreeClassifier(),
    "Logistic Regression": LogisticRegression(max_iter=1000),
}

for name, model in models.items():
    model.fit(X_train, y_train)  # Train the model
    y_pred = model.predict(X_test) # Make predictions
    accuracy = accuracy_score(y_test, y_pred)  # Calculate accuracy
    print(f"{name}: Accuracy = {accuracy:.2f}")

Random Forest: Accuracy = 0.94
Decision Tree: Accuracy = 0.89
Logistic Regression: Accuracy = 0.85


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=1000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [None]:
# Model 1 - Default
model1 = RandomForestClassifier()
model1.fit(X_train, y_train)  # Train the Random Forest model
pred_1 = model1.predict(X_test)
accuracy_1 = accuracy_score(y_test,pred_1)
print(f"Random Forest Model 1 Accuracy: {accuracy_1:.2f}")

# Model 2 - More trees (200) - better performance in many cases but slower
model2 = RandomForestClassifier(n_estimators=200)
model2.fit(X_train, y_train)
pred_2 = model2.predict(X_test)
accuracy_2 = accuracy_score(y_test, pred_2)
print(f"Random Forest Model 2 Accuracy: {accuracy_2:.2f}")

# Model 3 - Max depth of trees (5) - prevents overfitting
model3 = RandomForestClassifier(max_depth=5)
model3.fit(X_train, y_train)
pred3 = model3.predict(X_test)
acc3 = accuracy_score(y_test, pred3)
print("Model 3 (max_depth=5):", acc3)

# Model 4 - Fewer trees + depth limit
model4 = RandomForestClassifier(n_estimators=100, max_depth=3)
model4.fit(X_train, y_train)
pred4 = model4.predict(X_test)
acc4 = accuracy_score(y_test, pred4)
print("Model 4 (n_estimators=100, max_depth=3):", acc4)


Random Forest Model 1 Accuracy: 0.93
Random Forest Model 2 Accuracy: 0.94
Model 3 (max_depth=5): 0.8833333333333333
Model 4 (n_estimators=100, max_depth=3): 0.8222222222222222


In [8]:
import joblib

# Replace model2 with your best model
joblib.dump(model2, "traffic_model.pkl")
# Load the model
model = joblib.load("traffic_model.pkl")
