In [11]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import os

# Set style parameters
plt.style.use('seaborn')
blue_palette = ['#1f77b4', '#7aa6c2', '#aec7e8', '#c7d9e8', '#e1e7ed']
sns.set_palette(blue_palette)

# Define the correct data path
data_path = os.path.join('Data', 'Combined_Data')

# Verify files exist in the Combined_Data directory
required_files = ['combined_accidents.csv', 'combined_casualties.csv', 'combined_vehicles.csv']
for file in required_files:
    file_path = os.path.join(data_path, file)
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"Missing required file: {file_path}")

# Read the data
print("Reading data files...")
accidents_df = pd.read_csv(os.path.join(data_path, 'combined_accidents.csv'))
casualties_df = pd.read_csv(os.path.join(data_path, 'combined_casualties.csv'))
vehicles_df = pd.read_csv(os.path.join(data_path, 'combined_vehicles.csv'))

# Convert Date column to datetime with UK format (day first)
accidents_df['Date'] = pd.to_datetime(accidents_df['Date'], dayfirst=True)
accidents_df['Year'] = accidents_df['Date'].dt.year
accidents_df['Month'] = accidents_df['Date'].dt.month

# Define time parsing function
def parse_time(time_str):
    """Parse time string to hour, handling 'Unknown' and invalid values."""
    if pd.isna(time_str) or time_str == 'Unknown':
        return np.nan
    try:
        # For HH:MM format
        if ':' in str(time_str):
            return int(str(time_str).split(':')[0])
        return np.nan
    except:
        return np.nan

# Convert Time to Hour with robust parsing
print("Processing time data...")
accidents_df['Hour'] = accidents_df['Time'].apply(parse_time)

# Create output directory
os.makedirs('visualizations', exist_ok=True)
print("Created visualizations directory")

# Initialize list to store all plot divs
plot_divs = []

# Calculate some summary statistics for our analysis
total_accidents = len(accidents_df)
total_casualties = len(casualties_df)
total_vehicles = len(vehicles_df)
severity_counts = accidents_df['Accident_Severity'].value_counts()
yearly_counts = accidents_df['Year'].value_counts().sort_index()

# 1. Interactive Accident Severity Trend
print("Generating severity trend visualization...")
severity_by_year = accidents_df.groupby(['Year', 'Accident_Severity']).size().unstack()
fig_severity = go.Figure()
for severity in severity_by_year.columns:
    fig_severity.add_trace(go.Bar(
        x=severity_by_year.index,
        y=severity_by_year[severity],
        name=severity,
        marker_color=blue_palette[list(severity_by_year.columns).index(severity)]
    ))
fig_severity.update_layout(
    title='Accident Severity Distribution by Year',
    barmode='stack',
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgba(0,0,0,0)',
    font=dict(color='#444444'),
    xaxis_title="Year",
    yaxis_title="Number of Accidents",
    showlegend=True,
    legend_title="Severity Level",
    height=600
)
plot_divs.append(fig_severity.to_html(full_html=False, include_plotlyjs='cdn'))




The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead.



Reading data files...
Processing time data...
Created visualizations directory
Generating severity trend visualization...


In [12]:
# 2. Time of Day Heatmap
# Hour and Day Heatmap
print("Generating hour-day heatmap...")
hour_day_accidents = pd.crosstab(accidents_df['Hour'], accidents_df['Day_of_Week'])

# Order the days correctly
days_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
hour_day_accidents = hour_day_accidents[days_order]

fig_hour_day = px.imshow(
    hour_day_accidents,
    color_continuous_scale=['#f8f9fa', '#e1e7ed', '#c7d9e8', '#aec7e8', '#7aa6c2', '#1f77b4'],
    title='Accident Distribution by Hour and Day',
    labels=dict(x="Day of Week", y="Hour of Day", color="Number of Accidents")
)

fig_hour_day.update_layout(
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgba(0,0,0,0)',
    height=600,
    xaxis_title='Day of Week',
    yaxis_title='Hour of Day',
    margin=dict(t=50, b=50)
)

plot_divs.append(fig_hour_day.to_html(full_html=False, include_plotlyjs=False))

# 3. Interactive Age Distribution
print("Generating age distribution visualization...")
fig_age = px.violin(casualties_df,
                   x='Casualty_Severity',
                   y='Age_of_Casualty',
                   color='Casualty_Severity',
                   color_discrete_sequence=blue_palette,
                   box=True,
                   title='Age Distribution by Casualty Severity',
                   labels={'Casualty_Severity': 'Severity Level',
                          'Age_of_Casualty': 'Age'})
fig_age.update_layout(
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgba(0,0,0,0)',
    height=600
)
plot_divs.append(fig_age.to_html(full_html=False, include_plotlyjs=False))

# 4. Weather Impact Sunburst
print("Generating weather impact visualization...")
fig_weather = px.sunburst(
    accidents_df,
    path=['Weather_Conditions', 'Accident_Severity'],
    color_discrete_sequence=blue_palette,
    title='Weather Conditions Impact on Accident Severity'
)
fig_weather.update_layout(
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgba(0,0,0,0)',
    height=600
)
plot_divs.append(fig_weather.to_html(full_html=False, include_plotlyjs=False))

# 5. Vehicle Type Tree Map
print("Generating vehicle type visualization...")
vehicle_counts = vehicles_df['Vehicle_Type'].value_counts().reset_index()
vehicle_counts.columns = ['Vehicle_Type', 'Count']
fig_vehicles = px.treemap(
    vehicle_counts,
    path=['Vehicle_Type'],
    values='Count',
    color_discrete_sequence=blue_palette,
    title='Distribution of Vehicle Types in Accidents'
)
fig_vehicles.update_layout(
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgba(0,0,0,0)',
    height=600
)
plot_divs.append(fig_vehicles.to_html(full_html=False, include_plotlyjs=False))

# 6. Road Speed Analysis
print("Generating road speed analysis...")
fig_road = px.box(accidents_df,
                 x='Road_Type',
                 y='Speed_limit',
                 color='Road_Type',
                 color_discrete_sequence=blue_palette,
                 title='Speed Limits by Road Type',
                 labels={'Road_Type': 'Type of Road',
                        'Speed_limit': 'Speed Limit (mph)'})
fig_road.update_layout(
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgba(0,0,0,0)',
    xaxis={'tickangle': 45},
    height=600
)
plot_divs.append(fig_road.to_html(full_html=False, include_plotlyjs=False))

# 7. Casualty Demographics Bubble Plot
print("Generating casualty demographics visualization...")
casualty_demographics = casualties_df.groupby(
    ['Age_Band_of_Casualty', 'Sex_of_Casualty', 'Casualty_Severity']
).size().reset_index(name='count')

fig_demographics = px.scatter(
    casualty_demographics,
    x='Age_Band_of_Casualty',
    y='Casualty_Severity',
    size='count',
    color='Sex_of_Casualty',
    color_discrete_sequence=blue_palette,
    hover_data=['count'],
    title='Casualty Demographics Analysis',
    labels={'Age_Band_of_Casualty': 'Age Band',
            'Casualty_Severity': 'Severity Level',
            'count': 'Number of Casualties',
            'Sex_of_Casualty': 'Gender'}
)
fig_demographics.update_layout(
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgba(0,0,0,0)',
    xaxis={'tickangle': 45},
    height=600
)
plot_divs.append(fig_demographics.to_html(full_html=False, include_plotlyjs=False))

# Calculate additional statistics for our analysis
weather_impact = pd.crosstab(accidents_df['Weather_Conditions'], 
                           accidents_df['Accident_Severity'], 
                           normalize='index') * 100

casualty_age_stats = casualties_df.groupby('Casualty_Severity')['Age_of_Casualty'].agg(['mean', 'median', 'std']).round(1)

# Begin HTML generation with comprehensive analysis
print("Generating HTML report...")

Generating hour-day heatmap...


KeyError: "None of [Index(['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday',\n       'Sunday'],\n      dtype='object', name='Day_of_Week')] are in the [columns]"

In [8]:
# Generate comprehensive HTML report
html_content = f'''
<!DOCTYPE html>
<html>
<head>
    <title>UK Traffic Accident Analysis (2015-2018)</title>
    <style>
        body {{
            font-family: 'Helvetica Neue', Arial, sans-serif;
            margin: 0;
            padding: 0;
            background-color: #f5f7fa;
            color: #2c3e50;
            line-height: 1.6;
        }}
        .navbar {{
            background-color: #1f77b4;
            padding: 15px 20px;
            position: fixed;
            width: 100%;
            top: 0;
            z-index: 1000;
            box-shadow: 0 2px 5px rgba(0,0,0,0.1);
        }}
        .navbar a {{
            color: white;
            text-decoration: none;
            margin-right: 20px;
            font-size: 14px;
            transition: opacity 0.3s;
        }}
        .navbar a:hover {{
            opacity: 0.8;
        }}
        .container {{
            max-width: 1200px;
            margin: 80px auto 20px auto;
            padding: 20px;
        }}
        .intro-section {{
            background-color: white;
            padding: 30px;
            border-radius: 10px;
            margin-bottom: 30px;
            box-shadow: 0 2px 10px rgba(0,0,0,0.1);
        }}
        .dataset-section {{
            background-color: #f8f9fa;
            padding: 20px;
            border-radius: 8px;
            margin: 20px 0;
        }}
        .visualization-section {{
            background-color: white;
            margin-bottom: 30px;
            padding: 30px;
            border-radius: 10px;
            box-shadow: 0 2px 10px rgba(0,0,0,0.1);
        }}
        h1 {{
            color: #1f77b4;
            text-align: center;
            margin-bottom: 40px;
            font-size: 2.5em;
        }}
        h2 {{
            color: #2c3e50;
            border-bottom: 2px solid #eef2f5;
            padding-bottom: 10px;
            margin-top: 40px;
            font-size: 1.8em;
        }}
        h3 {{
            color: #34495e;
            margin-top: 25px;
            font-size: 1.4em;
        }}
        .key-stat {{
            background-color: #f8f9fa;
            padding: 15px;
            border-radius: 5px;
            margin: 10px 0;
            border-left: 4px solid #1f77b4;
        }}
        .insight-box {{
            background-color: #e8f4f8;
            padding: 20px;
            border-radius: 5px;
            margin: 15px 0;
        }}
        .data-table {{
            width: 100%;
            border-collapse: collapse;
            margin: 15px 0;
        }}
        .data-table th, .data-table td {{
            padding: 12px;
            border: 1px solid #dee2e6;
            text-align: left;
        }}
        .data-table th {{
            background-color: #f8f9fa;
        }}
        code {{
            background-color: #f8f9fa;
            padding: 2px 5px;
            border-radius: 3px;
            font-family: 'Courier New', monospace;
        }}
    </style>
</head>
<body>
    <div class="navbar">
        <a href="#overview">Overview</a>
        <a href="#datasets">Datasets</a>
        <a href="#severity">Severity Analysis</a>
        <a href="#temporal">Temporal Patterns</a>
        <a href="#weather">Weather Impact</a>
        <a href="#vehicles">Vehicle Analysis</a>
        <a href="#casualties">Casualty Analysis</a>
        <a href="#findings">Key Findings</a>
    </div>
    
    <div class="container">
        <div id="overview" class="intro-section">
            <h1>UK Traffic Accident Analysis (2015-2018)</h1>
            
            <div class="key-stat">
                <p><strong>Analysis Overview:</strong></p>
                <ul>
                    <li>Total Accidents Analyzed: {total_accidents:,}</li>
                    <li>Total Casualties: {total_casualties:,}</li>
                    <li>Total Vehicles Involved: {total_vehicles:,}</li>
                    <li>Time Period: 2015-2018</li>
                </ul>
            </div>
        </div>

        <div id="datasets" class="visualization-section">
            <h2>Dataset Information</h2>
            
            <div class="dataset-section">
                <h3>Accidents Dataset</h3>
                <p>Contains detailed information about each accident:</p>
                <ul>
                    <li><code>Accident_Index</code>: Unique identifier for each accident</li>
                    <li><code>Date</code>, <code>Time</code>: When the accident occurred</li>
                    <li><code>Location</code>: Geographical coordinates and district information</li>
                    <li><code>Road_Type</code>, <code>Speed_limit</code>: Road characteristics</li>
                    <li><code>Weather_Conditions</code>, <code>Light_Conditions</code>: Environmental factors</li>
                </ul>
            </div>

            <div class="dataset-section">
                <h3>Casualties Dataset</h3>
                <p>Details about individuals involved in accidents:</p>
                <ul>
                    <li><code>Casualty_Reference</code>: Unique identifier for each casualty</li>
                    <li><code>Age_of_Casualty</code>, <code>Sex_of_Casualty</code>: Demographic information</li>
                    <li><code>Casualty_Type</code>: Type of road user (pedestrian, driver, etc.)</li>
                    <li><code>Casualty_Severity</code>: Severity of injuries</li>
                </ul>
            </div>

            <div class="dataset-section">
                <h3>Vehicles Dataset</h3>
                <p>Information about vehicles involved:</p>
                <ul>
                    <li><code>Vehicle_Reference</code>: Unique identifier for each vehicle</li>
                    <li><code>Vehicle_Type</code>: Category of vehicle</li>
                    <li><code>Age_of_Vehicle</code>, <code>Engine_Capacity_(CC)</code>: Vehicle characteristics</li>
                    <li><code>Age_of_Driver</code>, <code>Sex_of_Driver</code>: Driver information</li>
                </ul>
            </div>
        </div>

        <div id="severity" class="visualization-section">
            <h2>Accident Severity Analysis</h2>
            {plot_divs[0]}
            <div class="insight-box">
                <h3>Key Insights:</h3>
                <ul>
                    <li>Overall decrease in accident numbers from 2015 to 2018</li>
                    <li>Slight accidents form the majority but show the steepest decline</li>
                    <li>Fatal accidents remain relatively stable despite overall reductions</li>
                    <li>Serious accidents show a slight upward trend in proportion</li>
                </ul>
            </div>
        </div>

        <div id="temporal" class="visualization-section">
            <h2>Temporal Patterns</h2>
            {plot_divs[1]}
            <div class="insight-box">
                <h3>Time of Day Analysis:</h3>
                <ul>
                    <li>Peak accident times correlate with rush hours (8-9 AM and 5-6 PM)</li>
                    <li>Higher severity rates during nighttime hours</li>
                    <li>Lower accident frequency during early morning hours (2-5 AM)</li>
                    <li>Distinct patterns between weekdays and weekends</li>
                </ul>
            </div>
        </div>

        <div class="visualization-section">
            <h2>Age Distribution Analysis</h2>
            {plot_divs[2]}
            <div class="insight-box">
                <h3>Age-Related Findings:</h3>
                <ul>
                    <li>Young adults (18-25) show higher involvement in accidents</li>
                    <li>Elderly casualties (65+) tend to have more severe injuries</li>
                    <li>Children under 15 show distinct casualty patterns</li>
                    <li>Middle-aged adults show the most varied severity distribution</li>
                </ul>
            </div>
        </div>

        <div id="weather" class="visualization-section">
            <h2>Weather Impact Analysis</h2>
            {plot_divs[3]}
            <div class="insight-box">
                <h3>Weather-Related Insights:</h3>
                <ul>
                    <li>Fine weather accounts for majority of accidents due to higher traffic volume</li>
                    <li>Rain increases accident likelihood but not necessarily severity</li>
                    <li>Snow and ice show fewer but more severe accidents</li>
                    <li>Fog and mist conditions show higher severity rates</li>
                </ul>
            </div>
        </div>

        <div id="vehicles" class="visualization-section">
            <h2>Vehicle Type Analysis</h2>
            {plot_divs[4]}
            {plot_divs[5]}
            <div class="insight-box">
                <h3>Vehicle-Related Findings:</h3>
                <ul>
                    <li>Cars dominate accident statistics due to their prevalence</li>
                    <li>Motorcycles show disproportionately high severity rates</li>
                    <li>Heavy vehicles (trucks, buses) show lower frequency but higher severity</li>
                    <li>Road type significantly influences accident severity for different vehicles</li>
                </ul>
            </div>
        </div>

        <div id="casualties" class="visualization-section">
            <h2>Casualty Demographics</h2>
            {plot_divs[6]}
            <div class="insight-box">
                <h3>Demographic Insights:</h3>
                <ul>
                    <li>Gender disparities in accident involvement and severity</li>
                    <li>Age groups show different vulnerability patterns</li>
                    <li>Socio-economic factors influence casualty rates</li>
                    <li>Urban vs rural differences in casualty profiles</li>
                </ul>
            </div>
        </div>

        <div id="findings" class="visualization-section">
            <h2>Key Findings and Recommendations</h2>
            
            <h3>Major Trends</h3>
            <div class="key-stat">
                <ul>
                    <li>Overall accident rates show a declining trend (2015-2018)</li>
                    <li>Severity patterns vary significantly by time, location, and conditions</li>
                    <li>Demographic factors strongly influence accident outcomes</li>
                    <li>Environmental conditions play a crucial role in accident severity</li>
                </ul>
            </div>

            <h3>Recommendations</h3>
            <div class="insight-box">
                <ul>
                    <li>Enhanced safety measures during peak hours</li>
                    <li>Targeted interventions for high-risk age groups</li>
                    <li>Weather-specific traffic management strategies</li>
                    <li>Vehicle-specific safety campaigns</li>
                    <li>Improved road design at high-risk locations</li>
                </ul>
            </div>

            <h3>Future Research Directions</h3>
            <div class="key-stat">
                <ul>
                    <li>Deep dive into specific vehicle type patterns</li>
                    <li>Analysis of intervention effectiveness</li>
                    <li>Seasonal pattern investigation</li>
                    <li>Socio-economic factor analysis</li>
                </ul>
            </div>
        </div>
    </div>
</body>
</html>
'''

# Write the HTML file
with open('visualizations/comprehensive_analysis.html', 'w', encoding='utf-8') as f:
    f.write(html_content)

print("Analysis complete! Open 'visualizations/comprehensive_analysis.html' in a web browser to view the complete analysis.")

Analysis complete! Open 'visualizations/comprehensive_analysis.html' in a web browser to view the complete analysis.


In [None]:
# 1. Interactive Accident Severity Trend
print("Generating severity trend visualization...")
severity_by_year = accidents_df.groupby(['Year', 'Accident_Severity']).size().unstack()
fig_severity = go.Figure()
for severity in severity_by_year.columns:
    fig_severity.add_trace(go.Bar(
        x=severity_by_year.index,
        y=severity_by_year[severity],
        name=severity,
        marker_color=blue_palette[list(severity_by_year.columns).index(severity)]
    ))
fig_severity.update_layout(
    title='Accident Severity Distribution by Year',
    barmode='stack',
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgba(0,0,0,0)',
    font=dict(color='#444444'),
    xaxis_title="Year",
    yaxis_title="Number of Accidents",
    showlegend=True,
    legend_title="Severity Level",
    height=600
)
plot_divs.append(fig_severity.to_html(full_html=False, include_plotlyjs='cdn'))

# 2. Time of Day Heatmap
print("Generating time of day heatmap...")
accidents_with_time = accidents_df.dropna(subset=['Hour'])
hourly_severity = pd.crosstab(accidents_with_time['Hour'], accidents_with_time['Accident_Severity'])
fig_time = px.imshow(hourly_severity,
                    color_continuous_scale='Blues',
                    title='Accident Severity by Hour of Day',
                    labels=dict(x="Severity Level", y="Hour of Day", color="Number of Accidents"))
fig_time.update_layout(
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgba(0,0,0,0)',
    height=600
)
plot_divs.append(fig_time.to_html(full_html=False, include_plotlyjs=False))

# 3. Hour and Day Heatmap
print("Generating hour-day heatmap...")
hour_day_accidents = pd.crosstab(accidents_df['Hour'], accidents_df['Day_of_Week'])
fig_hour_day = px.imshow(hour_day_accidents,
                        color_continuous_scale=['#f8f9fa', '#e1e7ed', '#c7d9e8', '#aec7e8', 
                                             '#7aa6c2', '#1f77b4'],
                        title='Accident Frequency by Hour and Day of Week',
                        labels=dict(x='Day of Week', y='Hour of Day', color='Number of Accidents'))
fig_hour_day.update_layout(
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgba(0,0,0,0)',
    height=600,
    xaxis_title='Day of Week',
    yaxis_title='Hour of Day'
)
plot_divs.append(fig_hour_day.to_html(full_html=False, include_plotlyjs=False))

# 4. Age Distribution
print("Generating age distribution visualization...")
fig_age = px.violin(casualties_df,
                   x='Casualty_Severity',
                   y='Age_of_Casualty',
                   color='Casualty_Severity',
                   color_discrete_sequence=blue_palette,
                   box=True,
                   title='Age Distribution by Casualty Severity',
                   labels={'Casualty_Severity': 'Severity Level',
                          'Age_of_Casualty': 'Age'})
fig_age.update_layout(
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgba(0,0,0,0)',
    height=600
)
plot_divs.append(fig_age.to_html(full_html=False, include_plotlyjs=False))

# 5. Weather Impact Sunburst
print("Generating weather impact visualization...")
fig_weather = px.sunburst(
    accidents_df,
    path=['Weather_Conditions', 'Accident_Severity'],
    color_discrete_sequence=blue_palette,
    title='Weather Conditions Impact on Accident Severity'
)
fig_weather.update_layout(
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgba(0,0,0,0)',
    height=600
)
plot_divs.append(fig_weather.to_html(full_html=False, include_plotlyjs=False))

# 6. Vehicle Type Distribution
print("Generating vehicle type visualization...")
vehicle_counts = vehicles_df['Vehicle_Type'].value_counts().reset_index()
vehicle_counts.columns = ['Vehicle_Type', 'Count']
fig_vehicles = px.treemap(
    vehicle_counts,
    path=['Vehicle_Type'],
    values='Count',
    color_discrete_sequence=blue_palette,
    title='Distribution of Vehicle Types in Accidents'
)
fig_vehicles.update_layout(
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgba(0,0,0,0)',
    height=600
)
plot_divs.append(fig_vehicles.to_html(full_html=False, include_plotlyjs=False))

# 7. Road Speed Analysis
print("Generating road speed analysis...")
fig_road = px.box(accidents_df,
                 x='Road_Type',
                 y='Speed_limit',
                 color='Road_Type',
                 color_discrete_sequence=blue_palette,
                 title='Speed Limits by Road Type',
                 labels={'Road_Type': 'Type of Road',
                        'Speed_limit': 'Speed Limit (mph)'})
fig_road.update_layout(
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgba(0,0,0,0)',
    xaxis={'tickangle': 45},
    height=600
)
plot_divs.append(fig_road.to_html(full_html=False, include_plotlyjs=False))

# 8. Casualty Demographics
print("Generating casualty demographics visualization...")
casualty_demographics = casualties_df.groupby(
    ['Age_Band_of_Casualty', 'Sex_of_Casualty', 'Casualty_Severity']
).size().reset_index(name='count')

fig_demographics = px.scatter(
    casualty_demographics,
    x='Age_Band_of_Casualty',
    y='Casualty_Severity',
    size='count',
    color='Sex_of_Casualty',
    color_discrete_sequence=blue_palette,
    hover_data=['count'],
    title='Casualty Demographics Analysis',
    labels={'Age_Band_of_Casualty': 'Age Band',
            'Casualty_Severity': 'Severity Level',
            'count': 'Number of Casualties',
            'Sex_of_Casualty': 'Gender'}
)
fig_demographics.update_layout(
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgba(0,0,0,0)',
    xaxis={'tickangle': 45},
    height=600
)
plot_divs.append(fig_demographics.to_html(full_html=False, include_plotlyjs=False))

print("Visualization generation complete!")

Generating severity trend visualization...
Generating time of day heatmap...
Generating hour-day heatmap...
Generating age distribution visualization...
Generating weather impact visualization...






Generating vehicle type visualization...
Generating road speed analysis...
Generating casualty demographics visualization...






Visualization generation complete!


In [10]:
# Generate comprehensive HTML report
print("Generating HTML report...")

html_content = f'''
<!DOCTYPE html>
<html>
<head>
    <title>UK Traffic Accident Analysis (2015-2018)</title>
    <style>
        body {{
            font-family: 'Helvetica Neue', Arial, sans-serif;
            margin: 0;
            padding: 0;
            background-color: #f5f7fa;
            color: #2c3e50;
            line-height: 1.6;
        }}
        .navbar {{
            background-color: #1f77b4;
            padding: 15px 20px;
            position: fixed;
            width: 100%;
            top: 0;
            z-index: 1000;
            box-shadow: 0 2px 5px rgba(0,0,0,0.1);
        }}
        .navbar a {{
            color: white;
            text-decoration: none;
            margin-right: 20px;
            font-size: 14px;
            transition: opacity 0.3s;
        }}
        .navbar a:hover {{
            opacity: 0.8;
            background-color: rgba(255,255,255,0.1);
            padding: 5px 10px;
            border-radius: 3px;
        }}
        .container {{
            max-width: 1200px;
            margin: 80px auto 20px auto;
            padding: 20px;
        }}
        .intro-section {{
            background-color: white;
            padding: 30px;
            border-radius: 10px;
            margin-bottom: 30px;
            box-shadow: 0 2px 10px rgba(0,0,0,0.1);
        }}
        .stats-grid {{
            display: grid;
            grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
            gap: 20px;
            margin: 20px 0;
        }}
        .stat-card {{
            background-color: #f8f9fa;
            padding: 20px;
            border-radius: 8px;
            border-left: 4px solid #1f77b4;
        }}
        .visualization-section {{
            background-color: white;
            margin-bottom: 30px;
            padding: 30px;
            border-radius: 10px;
            box-shadow: 0 2px 10px rgba(0,0,0,0.1);
        }}
        h1 {{
            color: #1f77b4;
            text-align: center;
            margin-bottom: 40px;
            font-size: 2.5em;
        }}
        h2 {{
            color: #2c3e50;
            border-bottom: 2px solid #eef2f5;
            padding-bottom: 10px;
            margin-top: 40px;
            font-size: 1.8em;
        }}
        h3 {{
            color: #34495e;
            margin-top: 25px;
            font-size: 1.4em;
        }}
        .insight-box {{
            background-color: #e8f4f8;
            padding: 20px;
            border-radius: 5px;
            margin: 15px 0;
        }}
        .key-stat {{
            background-color: #f8f9fa;
            padding: 15px;
            border-radius: 5px;
            margin: 10px 0;
            border-left: 4px solid #1f77b4;
        }}
        .recommendations {{
            background-color: #e8f4f8;
            padding: 20px;
            border-radius: 5px;
            margin-top: 20px;
        }}
        .data-table {{
            width: 100%;
            border-collapse: collapse;
            margin: 15px 0;
        }}
        .data-table th, .data-table td {{
            padding: 12px;
            border: 1px solid #dee2e6;
            text-align: left;
        }}
        .data-table th {{
            background-color: #f8f9fa;
        }}
    </style>
</head>
<body>
    <div class="navbar">
        <a href="#overview">Overview</a>
        <a href="#temporal">Temporal Analysis</a>
        <a href="#severity">Severity Analysis</a>
        <a href="#weather">Weather Impact</a>
        <a href="#vehicles">Vehicle Analysis</a>
        <a href="#casualties">Casualty Analysis</a>
        <a href="#findings">Key Findings</a>
    </div>
    
    <div class="container">
        <!-- Overview Section -->
        <div id="overview" class="intro-section">
            <h1>UK Traffic Accident Analysis (2015-2018)</h1>
            <div class="stats-grid">
                <div class="stat-card">
                    <h3>Total Accidents</h3>
                    <p style="font-size: 1.8em;">{total_accidents:,}</p>
                </div>
                <div class="stat-card">
                    <h3>Total Casualties</h3>
                    <p style="font-size: 1.8em;">{total_casualties:,}</p>
                </div>
                <div class="stat-card">
                    <h3>Total Vehicles</h3>
                    <p style="font-size: 1.8em;">{total_vehicles:,}</p>
                </div>
                <div class="stat-card">
                    <h3>Time Period</h3>
                    <p style="font-size: 1.8em;">2015-2018</p>
                </div>
            </div>
        </div>

      <!-- Temporal Analysis Section -->
<div id="temporal" class="visualization-section">
    <h2>Temporal Pattern Analysis</h2>
    
    <div class="visualization-container">
        <h3>Time of Day Distribution</h3>
        {plot_divs[1]}
        <div class="insight-box">
            <h3>Time of Day Patterns:</h3>
            <ul>
                <li>Morning Peak (7-9 AM): Coincides with work/school commute</li>
                <li>Evening Peak (4-6 PM): Shows highest accident frequency</li>
                <li>Night Hours (11 PM-4 AM): Lower frequency but higher severity</li>
                <li>Mid-day (10 AM-3 PM): Moderate but steady accident rates</li>
            </ul>
        </div>
    </div>

    <div class="visualization-container">
        <h3>Weekly Pattern Analysis</h3>
        <!-- Hour and Day Heatmap -->
        {plot_divs[2]}
        <div class="insight-box">
            <h3>Weekly Distribution:</h3>
            <ul>
                <li>Weekdays show distinct rush-hour patterns</li>
                <li>Friday evenings have notably higher accident rates</li>
                <li>Weekend patterns differ with later morning peaks and more distributed afternoon accidents</li>
                <li>Early morning hours show consistently lower rates across all days</li>
            </ul>
        </div>
    </div>
    <div class="key-stat">
        <h3>Critical Time Periods:</h3>
        <div class="stats-grid">
            <div class="stat-card">
                <h4>Weekday Peaks</h4>
                <ul>
                    <li>Morning: 7:00-9:00 AM</li>
                    <li>Evening: 4:00-6:30 PM</li>
                </ul>
            </div>
            <div class="stat-card">
                <h4>Weekend Patterns</h4>
                <ul>
                    <li>Mid-morning: 10:00 AM-12:00 PM</li>
                    <li>Afternoon: 2:00-6:00 PM</li>
                </ul>
            </div>
            <div class="stat-card">
                <h4>High Severity Times</h4>
                <ul>
                    <li>Night: 11:00 PM-2:00 AM</li>
                    <li>Early Morning: 4:00-6:00 AM</li>
                </ul>
            </div>
        </div>
    </div>

    <div class="recommendations">
        <h3>Temporal Pattern Recommendations:</h3>
        <ul>
            <li>Enhance traffic management during identified peak hours</li>
            <li>Implement specific weekend safety measures</li>
            <li>Increase visibility and safety measures during night hours</li>
            <li>Develop time-specific intervention strategies</li>
        </ul>
    </div>
</div>
    
        <!-- Severity Analysis Section -->
        <div id="severity" class="visualization-section">
            <h2>Severity Analysis</h2>
            {plot_divs[0]}
            <div class="insight-box">
                <h3>Severity Trends:</h3>
                <ul>
                    <li>Overall decrease in accident numbers from 2015 to 2018</li>
                    <li>Slight accidents show the most significant reduction</li>
                    <li>Fatal accidents remain relatively constant despite overall reductions</li>
                    <li>Serious accidents show slight proportional increase</li>
                </ul>
            </div>
        </div>

        <!-- Weather Impact Section -->
        <div id="weather" class="visualization-section">
            <h2>Weather Impact Analysis</h2>
            {plot_divs[4]}
            <div class="insight-box">
                <h3>Weather-Related Findings:</h3>
                <ul>
                    <li>Clear weather accounts for majority of accidents due to higher traffic volume</li>
                    <li>Rain increases accident likelihood but not necessarily severity</li>
                    <li>Snow and ice conditions show fewer but more severe accidents</li>
                    <li>Poor visibility conditions correlate with higher severity rates</li>
                </ul>
            </div>
        </div>

        <!-- Vehicle Analysis Section -->
        <div id="vehicles" class="visualization-section">
            <h2>Vehicle Type Analysis</h2>
            {plot_divs[5]}
            {plot_divs[6]}
            <div class="insight-box">
                <h3>Vehicle-Related Insights:</h3>
                <ul>
                    <li>Cars are involved in approximately 80% of all accidents</li>
                    <li>Two-wheeled vehicles show higher severity rates per incident</li>
                    <li>Heavy goods vehicles show lower frequency but higher severity</li>
                    <li>Vehicle type influences accident severity significantly</li>
                </ul>
            </div>
        </div>

        <!-- Casualty Analysis Section -->
        <div id="casualties" class="visualization-section">
            <h2>Casualty Analysis</h2>
            {plot_divs[3]}
            {plot_divs[7]}
            <div class="insight-box">
                <h3>Casualty Patterns:</h3>
                <ul>
                    <li>Young adults (18-25) show higher involvement rates</li>
                    <li>Elderly casualties (65+) show higher severity rates when involved</li>
                    <li>Gender differences are evident across age groups</li>
                    <li>Pedestrian casualties show distinct patterns from vehicle occupants</li>
                </ul>
            </div>
        </div>

        <!-- Key Findings Section -->
        <div id="findings" class="visualization-section">
            <h2>Key Findings and Recommendations</h2>
            <div class="recommendations">
                <h3>Primary Recommendations:</h3>
                <ul>
                    <li>Implement targeted safety measures during peak hours</li>
                    <li>Enhance safety education for high-risk age groups</li>
                    <li>Develop weather-specific traffic management strategies</li>
                    <li>Focus on vulnerable road user protection</li>
                    <li>Improve road design at high-risk locations</li>
                </ul>
            </div>
            <div class="key-stat">
                <h3>Future Research Directions:</h3>
                <ul>
                    <li>Detailed analysis of specific vehicle type patterns</li>
                    <li>Investigation of intervention effectiveness</li>
                    <li>Study of behavioral factors in accidents</li>
                    <li>Analysis of geographical patterns</li>
                </ul>
            </div>
        </div>
    </div>
</body>
</html>
'''

# Write the HTML file
with open('visualizations/comprehensive_analysis.html', 'w', encoding='utf-8') as f:
    f.write(html_content)

print("Analysis complete! Open 'visualizations/comprehensive_analysis.html' in a web browser to view the complete analysis.")

Generating HTML report...
Analysis complete! Open 'visualizations/comprehensive_analysis.html' in a web browser to view the complete analysis.
