<a href="https://colab.research.google.com/github/Trappyke/road_accident_data_science/blob/main/Road_Accident.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import joblib
from sklearn.preprocessing import LabelEncoder

   # Check if the file exists
if os.path.exists("./content/road_accident.csv"):
       print("File exists.")
       # Print the file content
       with open("./content/road_accident.csv", 'r') as file:
           print(file.read())
else:
       print("File not found.")

# Load the dataset
data = pd.read_csv("./content/road_accident.csv")

# Extract year, month, and day from the 'Date' column
data['Date'] = pd.to_datetime(data['Date'])
data['Year'] = data['Date'].dt.year
data['Month'] = data['Date'].dt.month
data['Day'] = data['Date'].dt.day
data['DayOfWeek'] = data['Date'].dt.dayofweek

# Drop the original 'Date' column
data.drop(columns=['Date'], inplace=True)

# Extract hours and minutes from the 'Time' column
data['Time'] = pd.to_datetime(data['Time']).dt.hour * 60 + pd.to_datetime(data['Time']).dt.minute

# Apply label encoding to the 'Location' column
label_encoder = LabelEncoder()
data['Location'] = label_encoder.fit_transform(data['Location'])

# Perform one-hot encoding for the remaining categorical columns
categorical_columns = ['VehicleTypes', 'Pedestrian/Cyclist', 'WeatherConditions', 'RoadConditions', 'OtherRelevantFactors']
data = pd.get_dummies(data, columns=categorical_columns)

# Apply label encoding to the 'Severity' column
data['Severity'] = label_encoder.fit_transform(data['Severity'])

# Define the dependent variable (Severity) and independent variables
X = data.drop(columns=['Severity'])
y = data['Severity']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Linear Regression model
model = LinearRegression()

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

# Save the model for future use
joblib.dump(model, "accident_severity_model.pkl")
