<a href="https://colab.research.google.com/github/benasphy/ML-projects/blob/main/Number%20of%20Car%20Accidents%20with%20Poisson%20Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [41]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import PoissonRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split



In [42]:
data = pd.read_csv("/content/traffic_accident_data.csv")
data

Unnamed: 0,Average_Speed,Traffic_Density,Number_of_Lanes,Driver_Age,Driver_Experience,Number_of_Vehicles,Road_Condition,Weather,Time_of_Day,Road_Type,Accidents_Happened
0,81,85,4,68,39,5,Wet,Snowy,Evening,City_Road,9
1,122,163,1,47,10,5,Foggy,Rainy,Evening,Highway,14
2,44,153,3,55,2,8,Snowy,Snowy,Morning,Rural_Road,11
3,101,444,3,51,5,6,Wet,Clear,Night,Rural_Road,7
4,90,494,3,29,8,8,Dry,Clear,Afternoon,City_Road,6
...,...,...,...,...,...,...,...,...,...,...,...
495,92,468,3,32,38,9,Foggy,Snowy,Afternoon,Rural_Road,18
496,98,383,2,51,3,5,Wet,Stormy,Morning,Highway,8
497,51,409,5,59,12,9,Snowy,Snowy,Morning,City_Road,12
498,122,17,4,61,48,9,Foggy,Snowy,Night,City_Road,13


In [43]:
df = pd.DataFrame(data)
df_filled = df.fillna(df.mean(numeric_only=True)).round(1)
df_filled

Unnamed: 0,Average_Speed,Traffic_Density,Number_of_Lanes,Driver_Age,Driver_Experience,Number_of_Vehicles,Road_Condition,Weather,Time_of_Day,Road_Type,Accidents_Happened
0,81,85,4,68,39,5,Wet,Snowy,Evening,City_Road,9
1,122,163,1,47,10,5,Foggy,Rainy,Evening,Highway,14
2,44,153,3,55,2,8,Snowy,Snowy,Morning,Rural_Road,11
3,101,444,3,51,5,6,Wet,Clear,Night,Rural_Road,7
4,90,494,3,29,8,8,Dry,Clear,Afternoon,City_Road,6
...,...,...,...,...,...,...,...,...,...,...,...
495,92,468,3,32,38,9,Foggy,Snowy,Afternoon,Rural_Road,18
496,98,383,2,51,3,5,Wet,Stormy,Morning,Highway,8
497,51,409,5,59,12,9,Snowy,Snowy,Morning,City_Road,12
498,122,17,4,61,48,9,Foggy,Snowy,Night,City_Road,13


In [44]:
X = df_filled.drop(columns=['Accidents_Happened'])  # All columns except target
y = df_filled['Accidents_Happened']  # Target variable

In [45]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [46]:
Preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), ['Average_Speed', 'Traffic_Density', 'Number_of_Lanes', 'Driver_Age', 'Driver_Age', 'Driver_Experience', 'Number_of_Vehicles']),
        ('cat', OneHotEncoder(), ['Road_Condition', 'Weather', 'Time_of_Day', 'Road_Type']) # Corrected the tuple format for the categorical transformer
    ]
    #remainder='passthrough'
)

In [47]:
Pipeline = Pipeline(steps=[('preprocessor', Preprocessor),
                    ('model', PoissonRegressor(alpha = 0.1, max_iter= 1000))
                    ])

In [48]:
Train = Pipeline.fit(X_train, y_train)
Pred = Pipeline.predict(X_test)
Pred

array([11.33518338,  7.3933795 , 14.60396811,  6.76854922, 11.86177877,
        5.35599032, 15.88569047, 10.41707028, 11.23219824, 13.41801136,
        6.64488497,  9.2432234 , 13.09034844, 15.6295766 ,  7.28732025,
        5.97813802, 18.86457976, 10.90487131, 13.35326073,  7.61996233,
       14.82611222,  7.60774099, 10.17929116, 10.5034525 ,  4.82014132,
       17.17803279, 16.81023129,  9.51758694,  8.82703818,  6.32149003,
       12.82941079,  9.01391516,  9.26589824, 11.60099675, 11.90190214,
        5.22197467,  6.77537254, 12.9944988 ,  8.961108  , 14.60023873,
       16.94857094, 11.48435143,  7.00120338, 11.80616775, 12.70752736,
        7.03695418,  7.56076681,  7.58499337,  9.87901744,  6.74652147,
        5.55116502, 10.88934247, 11.79050941, 12.04179502, 14.24333142,
        5.80794313, 12.20411409,  7.68054805, 10.56509113,  9.21817936,
       11.66806217, 11.65880164, 17.47833691, 12.87889665, 13.5088294 ,
       12.58754826,  8.81395623,  5.0593466 , 12.43957185, 12.26

In [49]:
mse = mean_squared_error(y_test, Pred)
mse

12.628947709222189

In [50]:
#Comparison

comparison = pd.DataFrame({'Actual': y_test, 'Predicted': Pred})
comparison

Unnamed: 0,Actual,Predicted
361,5,11.335183
73,9,7.393379
374,11,14.603968
155,12,6.768549
104,9,11.861779
...,...,...
347,17,12.451668
86,13,10.857894
75,8,13.842545
438,6,6.882085


In [53]:
new_data = pd.DataFrame({
    'Average_Speed': [76],
    'Traffic_Density': [48],
    'Number_of_Lanes': [5],
    'Driver_Age': [43],
    'Driver_Experience': [20],
    'Number_of_Vehicles': [1],
    'Road_Condition': 'Snowy',
    'Weather': 'Foggy',
    'Time_of_Day': 'Morning',
    'Road_Type': 'Rural_Road'

    })
new_data

Unnamed: 0,Average_Speed,Traffic_Density,Number_of_Lanes,Driver_Age,Driver_Experience,Number_of_Vehicles,Road_Condition,Weather,Time_of_Day,Road_Type
0,76,48,5,43,20,1,Snowy,Foggy,Morning,Rural_Road


In [54]:
new_predictions = Pipeline.predict(new_data)
new_data['Predicted_Accidents'] = new_predictions
new_data

Unnamed: 0,Average_Speed,Traffic_Density,Number_of_Lanes,Driver_Age,Driver_Experience,Number_of_Vehicles,Road_Condition,Weather,Time_of_Day,Road_Type,Predicted_Accidents
0,76,48,5,43,20,1,Snowy,Foggy,Morning,Rural_Road,9.323287
