<a href="https://colab.research.google.com/github/abdul-abdi/datascience/blob/main/data_science_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [35]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import joblib
from sklearn.preprocessing import LabelEncoder

# Load the dataset
data = pd.read_csv("dataset - tableConvert.com_ulb3bn.csv")

# Extract year, month, and day from the 'Date' column
data['Date'] = pd.to_datetime(data['Date'])
data['Year'] = data['Date'].dt.year
data['Month'] = data['Date'].dt.month
data['Day'] = data['Date'].dt.day

# Drop the original 'Date' column
data.drop(columns=['Date'], inplace=True)

# Extract hours and minutes from the 'Time' column
data['Time'] = pd.to_datetime(data['Time']).dt.hour * 60 + pd.to_datetime(data['Time']).dt.minute

# Apply label encoding to the 'Location' column
label_encoder = LabelEncoder()
data['Location'] = label_encoder.fit_transform(data['Location'])

# Perform one-hot encoding for the remaining categorical columns
categorical_columns = ['VehicleTypes', 'Pedestrian/Cyclist', 'WeatherConditions', 'RoadConditions', 'OtherRelevantFactors']
data = pd.get_dummies(data, columns=categorical_columns)

# Apply label encoding to the 'Severity' column
data['Severity'] = label_encoder.fit_transform(data['Severity'])

# Define the dependent variable (Severity) and independent variables
X = data.drop(columns=['Severity'])
y = data['Severity']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Linear Regression model
model = LinearRegression()

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

# Save the model for future use
joblib.dump(model, "accident_severity_model.pkl")

# Identification of High-Risk Areas:
# The model can analyze historical accident data to identify areas with a high incidence of accidents.
# Authorities can allocate resources to improve road safety in these locations.

# Weather and Road Condition Analysis:
# By analyzing the impact of weather and road conditions on accident severity,
# the model can provide insights into when and where accidents are most likely to occur.
# This information can be used to issue warnings and advisories during adverse weather conditions.

# Resource Allocation:
# Governments and organizations can use the model to allocate resources effectively.
# For example, if the model indicates a high correlation between the severity of accidents
# and specific factors, such as road conditions, resources can be allocated to maintain or improve road surfaces.

# Traffic Management and Regulation:
# The model can assist in optimizing traffic management and regulation.
# For instance, during peak accident-prone hours, traffic rules can be enforced more rigorously
# to reduce the likelihood of severe accidents.

# Public Awareness Campaigns:
# Insights from the model can inform public awareness campaigns.
# Educational programs can be designed to inform drivers and pedestrians
# about factors that contribute to accident severity and how to mitigate risks.

# Emergency Response Planning:
# The model can help authorities plan for emergency response more effectively.
# It can predict the potential severity of accidents, allowing for better allocation
# of medical resources and personnel.

# Long-Term Infrastructure Improvements:
# By analyzing trends in accident severity, the model can guide long-term infrastructure improvements,
# such as building safer roads, improving signage, and implementing speed limits.

# Early Warning Systems:
# The model can be integrated into early warning systems to provide alerts to drivers and pedestrians
# when they are entering accident-prone areas or when conditions are likely to lead to accidents.


Mean Squared Error: 3.163159646828947


['accident_severity_model.pkl']