In [67]:
# Import necessary libraries and modules
import pandas as pd

import numpy as np
import sys
import os

# Add the project root directory to the Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))

sys.path.append(project_root)

from src import data_processing

# Load data
data_path = r'C:\Users\astle\SENG8080-23F-Sec-1\Project Team 3\traffic_prediction_project\datasets\traffic_incident_data.csv'
data = data_processing.load_traffic_data(data_path)

road_data_path = r'C:\Users\astle\SENG8080-23F-Sec-1\Project Team 3\traffic_prediction_project\datasets\Traffic_Collisions.csv'
road_data = data_processing.load_traffic_volume_data(road_data_path)


merged_df = pd.merge(data, road_data, on = 'Year_Month', how='inner')
merged_df = merged_df.drop_duplicates('OBJECTID')
# Destination directory for CSV export
# export_directory = "datasets"

# # Create the directory if it doesn't exist
# if not os.path.exists(export_directory):
#     os.makedirs(export_directory)

# Destination file for CSV export
# csv_file = os.path.join(export_directory, "merged_data_1.csv")
# merged_df.to_csv(csv_file, index=False)

merged_df = merged_df.drop(columns=['ENVIRONMENTCONDITION2','INTTRAFFICCONTROL','INITIALDIRECTIONOFTRAVELTWO'])
merged_df.isna().sum()

  road_data['Year_Month'] = road_data['ACCIDENTDATE'].dt.to_period('M')


_id                            0
point                          0
description                    0
start                          0
end                            0
incidentId                     0
lastModified                   0
roadClosed                     0
severity                       0
severityScore                  0
toPoint                        0
type                           0
isEndTimeBackfilled            0
title                          0
eventList                      0
icon                           0
isJamcident                    0
INCIDENT_DATE                  0
Year_Month                     0
X                              0
Y                              0
OBJECTID                       0
ACCIDENTNUM                    0
ACCIDENTDATE                   0
ACCIDENT_YEAR                  0
ACCIDENT_MONTH                 0
ACCIDENT_DAY                   0
ACCIDENT_HOUR                  0
ACCIDENT_MINUTE                0
ACCIDENT_SECOND                0
ACCIDENT_W

In [65]:

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import folium
from folium.plugins import HeatMap
from IPython.display import display
from geopy.geocoders import Bing

# Assuming df_incident is the DataFrame for traffic incident data and df_collision is for traffic collision data

# Features
features = [
    'X', 'Y',
    'COLLISIONTYPE', 'TRAFFICCONTROL', 'TRAFFICCONTROLCONDITION','ENVIRONMENTCONDITION1',
    'roadClosed'
]

# Target variable
target = 'severity'  

# Select features and target variable
data = merged_df[features + [target]].copy()

# Convert categorical variables to numerical representations
data = pd.get_dummies(data, columns=['TRAFFICCONTROL', 'TRAFFICCONTROLCONDITION','ENVIRONMENTCONDITION1','roadClosed'], drop_first=True)


# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data.drop(target, axis=1), data[target], test_size=0.2, random_state=42)

# Train a Random Forest Regressor
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Mapping severity levels
severity_mapping = {
    1: 'Low Impact',
    2: 'Minor',
    3: 'Moderate',
    4: 'Severe'
}

# Convert numerical predictions to severity strings
y_pred_strings = [severity_mapping[int(pred)] for pred in y_pred]

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Squared Error: {mse:.2f}")
print(f"R^2 Score: {r2:.2f}\n")


bing_api_key = 'AhW5_RFhg-QzrA95-uhpppal-B-eWgwCocRhhlmnB_ZYJniwj3cgHICbv3dXGn6F'
geolocator = Bing(api_key=bing_api_key)

location_names = []

# Define colors for each severity level
severity_colors = {
    'Low Impact': 'blue',
    'Minor': 'yellow',
    'Moderate': 'orange',
    'Severe': 'red'
}

for i, (_, row) in enumerate(X_test.iterrows()):
    if not np.isnan(y_pred[i]):
        location = geolocator.reverse((row['Y'], row['X']))
        location_names.append(location.address)
    else:
        location_names.append("NaN")

# Geospatial Visualization - Create a Folium map with a HeatMap
m = folium.Map(location=[X_test['Y'].mean(), X_test['X'].mean()], zoom_start=12)

# Combine actual and predicted coordinates for HeatMap without location names
heat_data_without_names = [
    [row['Y'], row['X'], y_pred[i] if not np.isnan(y_pred[i]) else 0]
    for i, (_, row) in enumerate(X_test.iterrows())
]

# Filter out rows with NaN values in the coordinates or predictions
heat_data_without_names = [point for point in heat_data_without_names if not any(np.isnan(point[:3]))]

# Add HeatMap to the map
HeatMap(heat_data_without_names, gradient={0.4: severity_colors['Low Impact'], 0.6: severity_colors['Minor'],
                                            0.8: severity_colors['Moderate'], 1: severity_colors['Severe']}).add_to(m)

# Add markers with labels for each location
for i, (_, row) in enumerate(X_test.iterrows()):
    if not np.isnan(y_pred[i]):
        folium.Marker([row['Y'], row['X']], popup=f"Location: {location_names[i]}, Predicted Severity: {y_pred_strings[i]}",
                      icon=folium.Icon(color=severity_colors[y_pred_strings[i]])).add_to(m)

# Save the map as an HTML file or display it in the notebook
m.save('geospatial_visualization.html')

display(m)


Mean Squared Error: 0.00
R^2 Score: 1.00

