<a href="https://colab.research.google.com/github/benasphy/Poisson-Regression/blob/main/No%20of%20Car%20Accident.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import PoissonRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Create a small dataset mimicking real-world traffic data
data = pd.DataFrame({
    'average_speed': [50, 70, 40, 60, 30, 80, 45, 55, 65, 75],  # Speed in km/h
    'traffic_density': [30, 80, 20, 60, 10, 90, 25, 40, 70, 85],  # Vehicles/km
    'road_condition': ['Good', 'Good', 'Bad', 'Good', 'Bad', 'Good', 'Bad', 'Good', 'Bad', 'Good'],  # Categorical
    'accident_count': [2, 5, 1, 4, 0, 7, 1, 3, 6, 8]  # Number of accidents
})

# Features and target
features = ['average_speed', 'traffic_density', 'road_condition']
target = 'accident_count'

# Train-test split
X = data[features]
y = data[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Preprocessing
# Numerical: StandardScaler
# Categorical: OneHotEncoder
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), ['average_speed', 'traffic_density']),
        ('cat', OneHotEncoder(), ['road_condition'])
    ]
)

# Pipeline with Poisson Regression
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('model', PoissonRegressor(alpha=0.1, max_iter=1000))
])

# Train the model
pipeline.fit(X_train, y_train)

# Predict on test data
y_pred = pipeline.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print(f"\nMean Squared Error: {mse:.2f}")
print(f"Mean Absolute Error: {mae:.2f}")

# Compare predictions with actual values
comparison = pd.DataFrame({'Actual': y_test.values, 'Predicted': y_pred})
print("\nActual vs Predicted Accident Counts:\n", comparison)

# **New Prediction: Unseen Data**
# Create new unseen data to predict accident counts
new_data = pd.DataFrame({
    'average_speed': [50, 30, 70],  # New average speeds in km/h
    'traffic_density': [35, 15, 85],  # New traffic densities
    'road_condition': ['Good', 'Bad', 'Good']  # New road conditions
})

# Predict accident counts for the new data
new_predictions = pipeline.predict(new_data)

# Combine the new data with predictions
new_data['Predicted_Accidents'] = new_predictions
print("\nNew Data with Predicted Accident Counts:\n", new_data)



Mean Squared Error: 6.95
Mean Absolute Error: 2.25

Actual vs Predicted Accident Counts:
    Actual  Predicted
0       6   2.377591
1       5   5.883916

New Data with Predicted Accident Counts:
    average_speed  traffic_density road_condition  Predicted_Accidents
0             50               35           Good             2.287073
1             30               15            Bad             0.598993
2             70               85           Good             6.234709
