In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
import pickle

# Load the dataset
dataset = pd.read_csv('mydataset.csv')

# Selecting relevant features and the target variable
X = dataset[['Number of Vehicles', 'Number of Casualties', 'Road Surface', 
             'Lighting Conditions', 'Weather Conditions', 'Type of Vehicle', 
             'Sex of Casualty', 'Age of Casualty']]
y = dataset['Casualty Severity']

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardizing the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Creating and training the linear regression model
model = LinearRegression()
model.fit(X_train_scaled, y_train)

# Saving the trained model to a file for future use
model_filename = 'accident_severity_model.pkl' # file saved in pickle format
with open(model_filename, 'wb') as file:
    pickle.dump(model, file)

print(f"Model saved as {model_filename}")

# Hypothetical scenario for prediction
hypothetical_data = pd.DataFrame({
    'Number of Vehicles': [2],
    'Number of Casualties': [1],
    'Road Surface': [1],
    'Lighting Conditions': [2],
    'Weather Conditions': [1],
    'Type of Vehicle': [10],
    'Sex of Casualty': [1],
    'Age of Casualty': [35]
})

# Scaling the hypothetical data
hypothetical_data_scaled = scaler.transform(hypothetical_data)

# Predicting the casualty severity
predicted_severity = model.predict(hypothetical_data_scaled)
print(f"Predicted Casualty Severity: {predicted_severity[0]}")

Model saved as accident_severity_model.pkl
Predicted Casualty Severity: 2.5634376648404618
