In [None]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import warnings

warnings.filterwarnings("ignore");

# Load the datasets
road_accidents = pd.read_csv('Road_Accidents_2017-Annuxure_Tables_4.csv')
traffic = pd.read_csv('traffic.csv')
seattle_weather = pd.read_csv('seattle-weather.csv')

# Preprocess the road accidents dataset
def preprocess_road_accidents(df):
    cols_to_convert = df.columns[1:]
    df[cols_to_convert] = df[cols_to_convert].apply(pd.to_numeric, errors='coerce')
    df = df.dropna()  # Dropping rows with NaN values
    return df

# Preprocess the traffic dataset
def preprocess_traffic(df):
    df['DateTime'] = pd.to_datetime(df['DateTime'])
    return df

# Preprocess the weather dataset
def preprocess_seattle_weather(df):
    df['date'] = pd.to_datetime(df['date'])
    return df


# Preprocess the datasets
road_accidents = preprocess_road_accidents(road_accidents)
traffic = preprocess_traffic(traffic)
seattle_weather = preprocess_seattle_weather(seattle_weather)

# Define features and target for training
features = ['State/UT-wise Total Number of Persons Injured in Road Accidents during - 2014', 

            'State/UT-wise Total Number of Persons Injured in Road Accidents during - 2015', 
            'State/UT-wise Total Number of Persons Injured in Road Accidents during - 2016']
target = 'State/UT-wise Total Number of Persons Injured in Road Accidents during - 2017'

# Ensure X and y are properly defined
X = road_accidents[features]
y = road_accidents[target]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

# Example User Preferences (you can customize this part based on real user inputs)
user_preferences = {
    'avoid_rain': True,
    'preferred_temp_range': (10, 25),  # in Celsius
    'avoid_high_traffic': True

}

# Function to generate route recommendations based on user preferences
def recommend_route(user_preferences, traffic_df, weather_df, accident_model):
    recommendations = []

    for i, row in traffic_df.iterrows():
        date = row['DateTime'].date()
        vehicles = row['Vehicles']
        

        # Get weather data for the date
        weather_data = weather_df[weather_df['date'] == pd.to_datetime(date)]
        
        if not weather_data.empty:
            weather_data = weather_data.iloc[0]
            temp = weather_data['temp_max']
            weather = weather_data['weather']
            
            # Check user preferences
            if user_preferences['avoid_rain'] and weather == 'rain':
                continue
            if not user_preferences['preferred_temp_range'][0] <= temp <= user_preferences['preferred_temp_range'][1]:
                continue
            
            if user_preferences['avoid_high_traffic'] and vehicles > traffic_df['Vehicles'].mean():
                continue
            

            # Predict accidents using the features available during model training
            # Note: Here we should use the same features used during training
            accident_features = [vehicles, row['Junction'], row['ID']]  # Adjust features to match training data
            accident_pred = accident_model.predict([accident_features])[0]
            
            recommendations.append((row['ID'], accident_pred, temp, weather, vehicles))
    
    # Sort recommendations by predicted accidents (ascending)
    recommendations.sort(key=lambda x: x[1])
    
    return recommendations

# Get route recommendations
recommendations = recommend_route(user_preferences, traffic, seattle_weather, model)


# Display top 5 recommendations
for rec in recommendations[:5]:
    print(f"Route ID: {rec[0]}, Predicted Accidents: {rec[1]}, Temperature: {rec[2]}, Weather: {rec[3]}, Traffic: {rec[4]}")
