In [4]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import joblib

In [5]:
# Load the dataset
incidents_df =pd.read_csv("updated_incidents.csv")

In [6]:
incidents_df.columns

Index(['LightingCondition', 'WeatherCondition', 'SurfaceCondition',
       'RoadCharacteristics', 'PrimaryFactor', 'TrafficIncidentSpeed',
       'congestion', 'predicted_congestion'],
      dtype='object')

In [7]:
""" 
# Feature value-specific weights based on their likelihood of causing congestion

value_weights = {
    'LightingCondition': {
        'DAYLIGHT': 0.1,       # Lower chance of congestion in daylight
        'DARK': 0.8,           # Higher chance of congestion in darkness
        'DARK (LIGHTED)': 0.6, # Slightly lower chance if the area is lighted
        'DAWN/DUSK': 0.4       # Medium chance at dawn or dusk
    },
    'WeatherCondition': {
        'CLEAR': 0.1,          # Clear weather, low congestion likelihood
        'RAIN': 0.7,           # Rain increases congestion likelihood
        'CLOUDY': 0.3,         # Cloudy but no precipitation, moderate chance
        'SNOW': 0.9,           # Snow highly increases congestion
        'SLEET/HAIL/FREEZING': 0.8, # Hazardous weather, high congestion chance
        'BLOWING SAND/SOIL/SNOW': 0.6,
        'BLOWING': 0.5,
        'FOG/SMOKE': 0.7,
        'FOG/SMOKE/SMOG': 0.8  # Poor visibility increases congestion likelihood
    },
    'SurfaceCondition': {
        'DRY': 0.1,            # Dry surface, low chance of congestion
        'WET': 0.6,            # Wet surface increases congestion likelihood
        'SNOW/SLUSH': 0.9,     # Snowy or slushy surface increases congestion likelihood
        'ICE': 0.95,           # Icy roads are very likely to cause congestion
        'MUDDY': 0.7,          # Muddy surfaces increase the chance
        'WATER (STANDING OR MOVING)': 0.8 # Standing water or flooding causes congestion
    },
    'RoadCharacteristics': {
        'NON-ROADWAY CRASH': 0.2,    # Off-road incidents are less likely to cause congestion
        'STRAIGHT/HILLCREST': 0.4,   # Normal road conditions, medium chance
        'CURVE/HILLCREST': 0.8       # Curves or hillcrests increase congestion likelihood
    },
    'TrafficIncidentSpeed': {
        'low_speed_threshold': 30,   # Speeds below 30 considered congested
        'high_weight': 1.0,          # Full weight for very low speeds
        'low_weight': 0.0            # No congestion likelihood for higher speeds
    }
}

# Function to calculate congestion score based on specific feature value weights
def congestion_score(row):
    score = 0
    
    # Add weights for each feature based on their values
    score += value_weights['LightingCondition'].get(row['LightingCondition'], 0)
    score += value_weights['WeatherCondition'].get(row['WeatherCondition'], 0)
    score += value_weights['SurfaceCondition'].get(row['SurfaceCondition'], 0)
    score += value_weights['RoadCharacteristics'].get(row['RoadCharacteristics'], 0)
    
    # Calculate congestion likelihood based on speed
    if row['TrafficIncidentSpeed'] < value_weights['TrafficIncidentSpeed']['low_speed_threshold']:
        score += value_weights['TrafficIncidentSpeed']['high_weight']  # Congestion if speed is below the threshold
    else:
        score += value_weights['TrafficIncidentSpeed']['low_weight']   # No congestion if speed is above the threshold

    return score

# Apply the congestion rule to calculate a congestion score for each row in incidents_df
incidents_df['congestion_score'] = incidents_df.apply(congestion_score, axis=1)

# Assume congestion score greater than 2.0 indicates congestion (adjust threshold as needed)
incidents_df['congestion'] = incidents_df['congestion_score'].apply(lambda x: 1 if x > 2.0 else 0)

# Optionally, drop the congestion score column
incidents_df.drop(columns=['congestion_score'], inplace=True)

# Save the updated DataFrame to a CSV file named 'updated_incidents.csv'
incidents_df.to_csv('updated_incidents.csv', index=False)

print("The file 'updated_incidents.csv' has been saved with the congestion column.")
""" 


' \n# Feature value-specific weights based on their likelihood of causing congestion\n\nvalue_weights = {\n    \'LightingCondition\': {\n        \'DAYLIGHT\': 0.1,       # Lower chance of congestion in daylight\n        \'DARK\': 0.8,           # Higher chance of congestion in darkness\n        \'DARK (LIGHTED)\': 0.6, # Slightly lower chance if the area is lighted\n        \'DAWN/DUSK\': 0.4       # Medium chance at dawn or dusk\n    },\n    \'WeatherCondition\': {\n        \'CLEAR\': 0.1,          # Clear weather, low congestion likelihood\n        \'RAIN\': 0.7,           # Rain increases congestion likelihood\n        \'CLOUDY\': 0.3,         # Cloudy but no precipitation, moderate chance\n        \'SNOW\': 0.9,           # Snow highly increases congestion\n        \'SLEET/HAIL/FREEZING\': 0.8, # Hazardous weather, high congestion chance\n        \'BLOWING SAND/SOIL/SNOW\': 0.6,\n        \'BLOWING\': 0.5,\n        \'FOG/SMOKE\': 0.7,\n        \'FOG/SMOKE/SMOG\': 0.8  # Poor visibil

In [8]:
# Handle missing values
incidents_df.dropna(inplace=True)

In [9]:
# Define your features and target
target = 'congestion'
features = ['LightingCondition', 'WeatherCondition', 'SurfaceCondition', 'RoadCharacteristics','TrafficIncidentSpeed']

In [10]:
# One-hot encode the categorical columns
incidents_df_encoded = pd.get_dummies(incidents_df[features], drop_first=True)


In [11]:
# Define the new features (after encoding) and target
X = incidents_df_encoded
y = incidents_df[target]

In [12]:
# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)


In [13]:
# Train the Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [14]:
# Save the trained model
joblib.dump(model, 'traffic_congestion_model_rf.pkl')


['traffic_congestion_model_rf.pkl']

In [15]:
# Save the feature columns for later use
feature_columns = X.columns.tolist()
joblib.dump(feature_columns, 'feature_columns_rf.pkl')

print("Model trained and saved successfully.")

Model trained and saved successfully.


In [16]:
# Make predictions on the test set
y_pred = model.predict(X_test)

In [17]:
# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.99


In [18]:
# Calculate confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

# Optional: Print classification report for more details
print("Classification Report:")
print(classification_report(y_test, y_pred))

Confusion Matrix:
[[225   1]
 [  2  38]]
Classification Report:
              precision    recall  f1-score   support

           0       0.99      1.00      0.99       226
           1       0.97      0.95      0.96        40

    accuracy                           0.99       266
   macro avg       0.98      0.97      0.98       266
weighted avg       0.99      0.99      0.99       266



In [19]:
import pandas as pd
import joblib
import os

# Load the model and feature columns
model_path = 'traffic_congestion_model_rf.pkl'
feature_columns_path = 'feature_columns_rf.pkl'

# Check if the model file exists before loading
if os.path.isfile(model_path):
    model = joblib.load(model_path)
else:
    raise FileNotFoundError(f"Model file not found at: {model_path}")

if os.path.isfile(feature_columns_path):
    feature_columns = joblib.load(feature_columns_path)
else:
    raise FileNotFoundError(f"Feature columns file not found at: {feature_columns_path}")

# Define options for each parameter
lighting_conditions = ['DAYLIGHT', 'DARK (LIGHTED)', 'DARK', 'DAWN/DUSK']
weather_conditions = ['CLEAR', 'CLOUDY', 'RAIN', 'FOG', 'SNOW', 'SLEET/HAIL/FREEZING', 'BLOWING SAND/SOIL/SNOW']
surface_conditions = ['DRY', 'WET', 'ICY', 'SNOWY', 'ICE', 'MUDDY', 'WATER (STANDING OR MOVING)']
road_characteristics = ['STRAIGHT/HILLCREST', 'CURVE', 'NON-ROADWAY CRASH']

def get_user_input():
    """
    Function to get user input for traffic conditions.
    """
    print("Select Lighting Condition:")
    lighting_condition = input(f"Options: {lighting_conditions}\nYour choice: ")

    print("Select Weather Condition:")
    weather_condition = input(f"Options: {weather_conditions}\nYour choice: ")

    print("Select Surface Condition:")
    surface_condition = input(f"Options: {surface_conditions}\nYour choice: ")

    print("Select Road Characteristics:")
    road_characteristic = input(f"Options: {road_characteristics}\nYour choice: ")

    # Get TrafficIncidentSpeed as input
    traffic_incident_speed = float(input("Enter Traffic Incident Speed (in mph): "))

    return {
        'LightingCondition': lighting_condition,
        'WeatherCondition': weather_condition,
        'SurfaceCondition': surface_condition,
        'RoadCharacteristics': road_characteristic,
        'TrafficIncidentSpeed': traffic_incident_speed
    }

def predict_congestion(parameters):
    """
    This function accepts traffic parameters and predicts whether the traffic is congested or clear.
    
    Parameters:
    parameters (dict): A dictionary containing values for each feature
    
    Returns:
    str: Prediction result ('Congested' or 'Clear')
    """
    
    # Convert the parameters into a DataFrame
    new_data_df = pd.DataFrame([parameters])

    # One-hot encode the new data to match the format used in training
    new_data_encoded = pd.get_dummies(new_data_df)

    # Reindex the data to match the model's features (filling missing columns with 0)
    new_data_encoded = new_data_encoded.reindex(columns=feature_columns, fill_value=0)

    # Make prediction using the saved model
    prediction = model.predict(new_data_encoded)[0]

    # Return the result as 'Congested' or 'Clear'
    return 'Congested' if prediction == 1 else 'Clear'

# Main program
if __name__ == "__main__":
    user_parameters = get_user_input()
    result = predict_congestion(user_parameters)
    print(f"The predicted traffic condition is: {result}")


Select Lighting Condition:


Options: ['DAYLIGHT', 'DARK (LIGHTED)', 'DARK', 'DAWN/DUSK']
Your choice:  DARK


Select Weather Condition:


Options: ['CLEAR', 'CLOUDY', 'RAIN', 'FOG', 'SNOW', 'SLEET/HAIL/FREEZING', 'BLOWING SAND/SOIL/SNOW']
Your choice:  RAIN


Select Surface Condition:


Options: ['DRY', 'WET', 'ICY', 'SNOWY', 'ICE', 'MUDDY', 'WATER (STANDING OR MOVING)']
Your choice:  ICY


Select Road Characteristics:


Options: ['STRAIGHT/HILLCREST', 'CURVE', 'NON-ROADWAY CRASH']
Your choice:  CURVE
Enter Traffic Incident Speed (in mph):  30


The predicted traffic condition is: Congested
