In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import os

# Set style parameters
plt.style.use('seaborn')
blue_palette = ['#1f77b4', '#7aa6c2', '#aec7e8', '#c7d9e8', '#e1e7ed']
sns.set_palette(blue_palette)

# Define the correct data path
data_path = os.path.join('Data', 'Combined_Data')

# Create output directory
os.makedirs('visualizations', exist_ok=True)
print("Created visualizations directory")

# Verify files exist in the Combined_Data directory
required_files = ['combined_accidents.csv', 'combined_casualties.csv', 'combined_vehicles.csv']
for file in required_files:
    file_path = os.path.join(data_path, file)
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"Missing required file: {file_path}")

# Read the data
print("Reading data files...")
accidents_df = pd.read_csv(os.path.join(data_path, 'combined_accidents.csv'))
casualties_df = pd.read_csv(os.path.join(data_path, 'combined_casualties.csv'))
vehicles_df = pd.read_csv(os.path.join(data_path, 'combined_vehicles.csv'))

# Data preprocessing
print("Processing data...")

# Convert Date column to datetime with UK format (day first)
accidents_df['Date'] = pd.to_datetime(accidents_df['Date'], dayfirst=True)
accidents_df['Year'] = accidents_df['Date'].dt.year
accidents_df['Month'] = accidents_df['Date'].dt.month

# Define time parsing function
def parse_time(time_str):
    """Parse time string to hour, handling 'Unknown' and invalid values."""
    if pd.isna(time_str) or time_str == 'Unknown':
        return np.nan
    try:
        # For HH:MM format
        if ':' in str(time_str):
            return int(str(time_str).split(':')[0])
        return np.nan
    except:
        return np.nan

# Convert Time to Hour with robust parsing
accidents_df['Hour'] = accidents_df['Time'].apply(parse_time)

# Create day of week numerical mapping
day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
accidents_df['Day_Num'] = pd.Categorical(accidents_df['Day_of_Week'], 
                                       categories=day_order,
                                       ordered=True).codes

# Calculate summary statistics
total_accidents = len(accidents_df)
total_casualties = len(casualties_df)
total_vehicles = len(vehicles_df)
severity_counts = accidents_df['Accident_Severity'].value_counts()
yearly_counts = accidents_df['Year'].value_counts().sort_index()

# Calculate temporal statistics
hourly_distribution = accidents_df['Hour'].value_counts().sort_index()
daily_distribution = accidents_df['Day_of_Week'].value_counts()
monthly_distribution = accidents_df['Month'].value_counts().sort_index()

# Calculate weather impact statistics
weather_severity = pd.crosstab(accidents_df['Weather_Conditions'], 
                             accidents_df['Accident_Severity'], 
                             normalize='index') * 100

# Calculate casualty statistics
casualty_age_stats = casualties_df.groupby('Casualty_Severity')['Age_of_Casualty'].agg(['mean', 'median', 'std']).round(1)

# Calculate vehicle type statistics
vehicle_type_stats = vehicles_df['Vehicle_Type'].value_counts()

# Initialize list to store plot divs for HTML generation
plot_divs = []

print("Data preprocessing complete!")

  plt.style.use('seaborn')


Created visualizations directory
Reading data files...
Processing data...
Data preprocessing complete!
