In [1]:
import os

# Print current working directory
print("Current working directory:", os.getcwd())

# List contents of current directory
print("\nContents of current directory:", os.listdir())

# If there's a Data directory, list its contents
if os.path.exists('Data'):
    print("\nContents of Data directory:", os.listdir('Data'))

Current working directory: c:\Users\Wolfrank\Desktop\GiGabyte\CodeWolf\UKDataProject\Data\Combined_Data

Contents of current directory: ['analysis.ipynb', 'combined_accidents.csv', 'combined_casualties.csv', 'combined_vehicles.csv']


In [7]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import os

# Set style parameters
plt.style.use('seaborn')
blue_palette = ['#1f77b4', '#7aa6c2', '#aec7e8', '#c7d9e8', '#e1e7ed']
sns.set_palette(blue_palette)

# Verify files exist in current directory
required_files = ['combined_accidents.csv', 'combined_casualties.csv', 'combined_vehicles.csv']
for file in required_files:
    if not os.path.exists(file):
        raise FileNotFoundError(f"Missing required file: {file}")

# Read the data
print("Reading data files...")
accidents_df = pd.read_csv('combined_accidents.csv')
casualties_df = pd.read_csv('combined_casualties.csv')
vehicles_df = pd.read_csv('combined_vehicles.csv')

# Convert Date column to datetime with UK format (day first)
accidents_df['Date'] = pd.to_datetime(accidents_df['Date'], dayfirst=True)
accidents_df['Year'] = accidents_df['Date'].dt.year
accidents_df['Month'] = accidents_df['Date'].dt.month

# Define time parsing function
def parse_time(time_str):
    """Parse time string to hour, handling 'Unknown' and invalid values."""
    if pd.isna(time_str) or time_str == 'Unknown':
        return np.nan
    try:
        # For HH:MM format
        if ':' in str(time_str):
            return int(str(time_str).split(':')[0])
        return np.nan
    except:
        return np.nan

# Convert Time to Hour with robust parsing
print("Processing time data...")
accidents_df['Hour'] = accidents_df['Time'].apply(parse_time)

# Create output directory
os.makedirs('visualizations', exist_ok=True)
print("Created visualizations directory")

# Initialize list to store all plot divs
plot_divs = []

# 1. Interactive Accident Severity Trend
print("Generating severity trend visualization...")
severity_by_year = accidents_df.groupby(['Year', 'Accident_Severity']).size().unstack()
fig_severity = go.Figure()
for severity in severity_by_year.columns:
    fig_severity.add_trace(go.Bar(
        x=severity_by_year.index,
        y=severity_by_year[severity],
        name=severity,
        marker_color=blue_palette[list(severity_by_year.columns).index(severity)]
    ))
fig_severity.update_layout(
    title='Accident Severity Distribution by Year',
    barmode='stack',
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgba(0,0,0,0)',
    font=dict(color='#444444'),
    xaxis_title="Year",
    yaxis_title="Number of Accidents",
    showlegend=True,
    legend_title="Severity Level",
    height=600
)
plot_divs.append(fig_severity.to_html(full_html=False, include_plotlyjs='cdn'))

# 2. Time of Day Heatmap
print("Generating time of day heatmap...")
accidents_with_time = accidents_df.dropna(subset=['Hour'])
hourly_severity = pd.crosstab(accidents_with_time['Hour'], accidents_with_time['Accident_Severity'])
fig_time = px.imshow(hourly_severity,
                    color_continuous_scale='Blues',
                    title='Accident Severity by Hour of Day',
                    labels=dict(x="Severity Level", y="Hour of Day", color="Number of Accidents"))
fig_time.update_layout(
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgba(0,0,0,0)',
    height=600
)
plot_divs.append(fig_time.to_html(full_html=False, include_plotlyjs=False))

# 3. Interactive Age Distribution
print("Generating age distribution visualization...")
fig_age = px.violin(casualties_df,
                   x='Casualty_Severity',
                   y='Age_of_Casualty',
                   color='Casualty_Severity',
                   color_discrete_sequence=blue_palette,
                   box=True,
                   title='Age Distribution by Casualty Severity',
                   labels={'Casualty_Severity': 'Severity Level',
                          'Age_of_Casualty': 'Age'})
fig_age.update_layout(
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgba(0,0,0,0)',
    height=600
)
plot_divs.append(fig_age.to_html(full_html=False, include_plotlyjs=False))

# 4. Weather Impact Sunburst
print("Generating weather impact visualization...")
fig_weather = px.sunburst(
    accidents_df,
    path=['Weather_Conditions', 'Accident_Severity'],
    color_discrete_sequence=blue_palette,
    title='Weather Conditions Impact on Accident Severity'
)
fig_weather.update_layout(
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgba(0,0,0,0)',
    height=600
)
plot_divs.append(fig_weather.to_html(full_html=False, include_plotlyjs=False))

# 5. Vehicle Type Tree Map
print("Generating vehicle type visualization...")
vehicle_counts = vehicles_df['Vehicle_Type'].value_counts().reset_index()
vehicle_counts.columns = ['Vehicle_Type', 'Count']
fig_vehicles = px.treemap(
    vehicle_counts,
    path=['Vehicle_Type'],
    values='Count',
    color_discrete_sequence=blue_palette,
    title='Distribution of Vehicle Types in Accidents'
)
fig_vehicles.update_layout(
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgba(0,0,0,0)',
    height=600
)
plot_divs.append(fig_vehicles.to_html(full_html=False, include_plotlyjs=False))

# 6. Road Speed Analysis
print("Generating road speed analysis...")
fig_road = px.box(accidents_df,
                 x='Road_Type',
                 y='Speed_limit',
                 color='Road_Type',
                 color_discrete_sequence=blue_palette,
                 title='Speed Limits by Road Type',
                 labels={'Road_Type': 'Type of Road',
                        'Speed_limit': 'Speed Limit (mph)'})
fig_road.update_layout(
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgba(0,0,0,0)',
    xaxis={'tickangle': 45},
    height=600
)
plot_divs.append(fig_road.to_html(full_html=False, include_plotlyjs=False))

# 7. Casualty Demographics Bubble Plot
print("Generating casualty demographics visualization...")
casualty_demographics = casualties_df.groupby(
    ['Age_Band_of_Casualty', 'Sex_of_Casualty', 'Casualty_Severity']
).size().reset_index(name='count')

fig_demographics = px.scatter(
    casualty_demographics,
    x='Age_Band_of_Casualty',
    y='Casualty_Severity',
    size='count',
    color='Sex_of_Casualty',
    color_discrete_sequence=blue_palette,
    hover_data=['count'],
    title='Casualty Demographics Analysis',
    labels={'Age_Band_of_Casualty': 'Age Band',
            'Casualty_Severity': 'Severity Level',
            'count': 'Number of Casualties',
            'Sex_of_Casualty': 'Gender'}
)
fig_demographics.update_layout(
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgba(0,0,0,0)',
    xaxis={'tickangle': 45},
    height=600
)
plot_divs.append(fig_demographics.to_html(full_html=False, include_plotlyjs=False))

# Generate combined HTML report
print("Generating combined HTML report...")
html_content = f'''
<!DOCTYPE html>
<html>
<head>
    <title>UK Traffic Accident Analysis Dashboard</title>
    <style>
        body {{
            font-family: 'Helvetica Neue', Arial, sans-serif;
            margin: 0;
            padding: 0;
            background-color: #f5f7fa;
            color: #2c3e50;
        }}
        .navbar {{
            background-color: #1f77b4;
            padding: 10px 20px;
            position: fixed;
            width: 100%;
            top: 0;
            z-index: 1000;
            box-shadow: 0 2px 5px rgba(0,0,0,0.1);
        }}
        .navbar a {{
            color: white;
            text-decoration: none;
            margin-right: 20px;
            font-size: 14px;
        }}
        .navbar a:hover {{
            text-decoration: underline;
        }}
        .container {{
            max-width: 1200px;
            margin: 80px auto 20px auto;
            padding: 20px;
        }}
        .visualization-section {{
            background-color: white;
            margin-bottom: 30px;
            padding: 20px;
            border-radius: 10px;
            box-shadow: 0 2px 10px rgba(0,0,0,0.1);
        }}
        h1 {{
            color: #1f77b4;
            text-align: center;
            margin-bottom: 40px;
        }}
        h2 {{
            color: #2c3e50;
            border-bottom: 2px solid #eef2f5;
            padding-bottom: 10px;
            margin-top: 40px;
        }}
        .description {{
            margin: 15px 0;
            line-height: 1.6;
            color: #555;
        }}
        .plotly-graph-div {{
            margin: 20px auto;
        }}
    </style>
</head>
<body>
    <div class="navbar">
        <a href="#severity">Severity Trends</a>
        <a href="#time">Time Analysis</a>
        <a href="#age">Age Distribution</a>
        <a href="#weather">Weather Impact</a>
        <a href="#vehicles">Vehicle Types</a>
        <a href="#road">Road Analysis</a>
        <a href="#demographics">Demographics</a>
    </div>
    <div class="container">
        <h1>UK Traffic Accident Analysis Dashboard (2015-2018)</h1>
        
        <div id="severity" class="visualization-section">
            <h2>1. Accident Severity Trends</h2>
            {plot_divs[0]}
            <div class="description">
                Analysis of accident severity distribution across years, showing trends in fatal, serious, and slight accidents.
            </div>
        </div>
        
        <div id="time" class="visualization-section">
            <h2>2. Time of Day Analysis</h2>
            {plot_divs[1]}
            <div class="description">
                Heatmap showing the distribution of accidents throughout the day, highlighting peak accident hours.
            </div>
        </div>
        
        <div id="age" class="visualization-section">
            <h2>3. Casualty Age Distribution</h2>
            {plot_divs[2]}
            <div class="description">
                Violin plot showing the age distribution of casualties across different severity levels.
            </div>
        </div>
        
        <div id="weather" class="visualization-section">
            <h2>4. Weather Impact Analysis</h2>
            {plot_divs[3]}
            <div class="description">
                Sunburst diagram showing the relationship between weather conditions and accident severity.
            </div>
        </div>
        
        <div id="vehicles" class="visualization-section">
            <h2>5. Vehicle Type Distribution</h2>
            {plot_divs[4]}
            <div class="description">
                Treemap showing the distribution of different vehicle types involved in accidents.
            </div>
        </div>
        
        <div id="road" class="visualization-section">
            <h2>6. Road and Speed Analysis</h2>
            {plot_divs[5]}
            <div class="description">
                Box plot showing the relationship between road types and speed limits.
            </div>
        </div>
        
        <div id="demographics" class="visualization-section">
            <h2>7. Casualty Demographics</h2>
            {plot_divs[6]}
            <div class="description">
                Bubble plot showing the relationship between age bands, gender, and casualty severity.
            </div>
        </div>
    </div>
</body>
</html>
'''

# Write the combined HTML file
with open('visualizations/combined_analysis.html', 'w') as f:
    f.write(html_content)

print("Analysis complete! Open 'visualizations/combined_analysis.html' in a web browser to view all visualizations.")


The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead.



Reading data files...
Processing time data...
Created visualizations directory
Generating severity trend visualization...
Generating time of day heatmap...
Generating age distribution visualization...






Generating weather impact visualization...
Generating vehicle type visualization...
Generating road speed analysis...
Generating casualty demographics visualization...
Generating combined HTML report...






Analysis complete! Open 'visualizations/combined_analysis.html' in a web browser to view all visualizations.


In [4]:
# First, let's look at what's in the Time column
print("Sample of Time values:")
print(accidents_df['Time'].head())

Sample of Time values:
0    18:45
1    07:50
2    18:08
3    07:40
4    07:30
Name: Time, dtype: object
