In [6]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import os

def create_dashboard():
    # Explicitly set data path
    data_path = "refvehicles.csv"
    
    try:
        # Read the data
        print(f"Reading data from: {data_path}")
        df = pd.read_csv(data_path)
        print("Data loaded successfully. Shape:", df.shape)
        
        # Rename columns
        df.columns = [
            'Accident_Index',
            'Vehicle_Reference',
            'Vehicle_Type',
            'Sex_of_Driver',
            'Age_of_Driver',
            'Age_Band_of_Driver',
            'Engine_Capacity_CC',
            'Age_of_Vehicle'
        ]
        
        # Clean the data
        # Remove unknown ages (-1 or negative values)
        df_clean = df[df['Age_of_Driver'] > 0].copy()
        
        # Clean gender data (1 = male, 2 = female)
        df_clean['Sex_of_Driver'] = df_clean['Sex_of_Driver'].map({1: 'Male', 2: 'Female'})
        df_clean = df_clean[df_clean['Sex_of_Driver'].isin(['Male', 'Female'])]
        
        # Calculate summary statistics
        total_accidents = len(df_clean)
        avg_age = df_clean['Age_of_Driver'].mean()
        avg_vehicle_age = df_clean['Age_of_Vehicle'].mean()
        
        # Color scheme
        colors = {
            'primary_blue': '#1f77b4',
            'light_blue': '#7cc7ff',
            'dark_blue': '#1e4d6f',
            'grey': '#636363',
            'light_grey': '#e0e0e0'
        }
        
        # Create visualizations using plotly
        # Age Distribution
        fig1 = px.histogram(
            df_clean, 
            x='Age_of_Driver',
            title='Distribution of Driver Ages',
            labels={'Age_of_Driver': 'Age', 'count': 'Frequency'},
            color_discrete_sequence=[colors['primary_blue']],
            nbins=30
        )
        fig1.update_layout(
            plot_bgcolor='white',
            paper_bgcolor='white'
        )
        
        # Vehicle Type Distribution
        vehicle_counts = df_clean['Vehicle_Type'].value_counts()
        fig2 = px.bar(
            x=vehicle_counts.index, 
            y=vehicle_counts.values,
            title='Distribution of Vehicle Types',
            labels={'x': 'Vehicle Type', 'y': 'Count'},
            color_discrete_sequence=[colors['primary_blue']]
        )
        fig2.update_layout(
            plot_bgcolor='white',
            paper_bgcolor='white'
        )
        
        # Engine Capacity Distribution
        fig3 = px.box(
            df_clean, 
            y='Engine_Capacity_CC',
            title='Engine Capacity Distribution',
            labels={'Engine_Capacity_CC': 'Engine Capacity (CC)'},
            color_discrete_sequence=[colors['primary_blue']]
        )
        fig3.update_layout(
            plot_bgcolor='white',
            paper_bgcolor='white'
        )
        
        # Gender Distribution
        gender_counts = df_clean['Sex_of_Driver'].value_counts()
        fig4 = px.pie(
            values=gender_counts.values, 
            names=gender_counts.index,
            title='Distribution of Driver Gender',
            color_discrete_sequence=[colors['primary_blue'], colors['grey']]
        )
        
        print("Visualizations created successfully")

        # Create HTML content
        html_content = f"""
        <!DOCTYPE html>
        <html>
        <head>
            <title>Vehicle Accidents Dashboard</title>
            <style>
                body {{
                    font-family: Arial, sans-serif;
                    margin: 0;
                    padding: 20px;
                    background-color: #f5f5f5;
                }}
                .dashboard-container {{
                    max-width: 1200px;
                    margin: 0 auto;
                    background-color: white;
                    padding: 20px;
                    border-radius: 10px;
                    box-shadow: 0 0 10px rgba(0,0,0,0.1);
                }}
                .chart-container {{
                    margin-bottom: 30px;
                    padding: 15px;
                    background-color: white;
                    border-radius: 5px;
                    box-shadow: 0 2px 4px rgba(0,0,0,0.05);
                }}
                h1, h2 {{
                    color: #1f77b4;
                }}
                .summary-stats {{
                    display: grid;
                    grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
                    gap: 20px;
                    margin-bottom: 30px;
                }}
                .stat-card {{
                    background-color: #f8f9fa;
                    padding: 15px;
                    border-radius: 5px;
                    text-align: center;
                    border-left: 4px solid #1f77b4;
                }}
            </style>
        </head>
        <body>
            <div class="dashboard-container">
                <h1>Vehicle Accidents Analysis Dashboard</h1>
                
                <div class="summary-stats">
                    <div class="stat-card">
                        <h3>Total Accidents</h3>
                        <p>{total_accidents:,}</p>
                    </div>
                    <div class="stat-card">
                        <h3>Average Driver Age</h3>
                        <p>{avg_age:.1f} years</p>
                    </div>
                    <div class="stat-card">
                        <h3>Average Vehicle Age</h3>
                        <p>{avg_vehicle_age:.1f} years</p>
                    </div>
                </div>

                <div class="chart-container">
                    <div id="age_distribution"></div>
                </div>
                <div class="chart-container">
                    <div id="vehicle_type_dist"></div>
                </div>
                <div class="chart-container">
                    <div id="engine_capacity_dist"></div>
                </div>
                <div class="chart-container">
                    <div id="gender_dist"></div>
                </div>
            </div>
            <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
        </body>
        </html>
        """

        # Save to HTML file
        output_path = 'vehicle_accidents_dashboard.html'
        with open(output_path, 'w') as f:
            f.write(html_content)
            f.write(fig1.to_html(full_html=False, include_plotlyjs=False))
            f.write(fig2.to_html(full_html=False, include_plotlyjs=False))
            f.write(fig3.to_html(full_html=False, include_plotlyjs=False))
            f.write(fig4.to_html(full_html=False, include_plotlyjs=False))

        print(f"Dashboard has been created at: {os.path.abspath(output_path)}")
        return output_path
        
    except FileNotFoundError:
        print(f"Error: Could not find the file {data_path}")
        print(f"Current working directory: {os.getcwd()}")
        return None
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        return None

# Run the dashboard creation
if __name__ == "__main__":
    dashboard_path = create_dashboard()

Reading data from: refvehicles.csv
Data loaded successfully. Shape: (975680, 8)
Visualizations created successfully
Dashboard has been created at: c:\Users\Wolfrank\Desktop\GiGabyte\CodeWolf\UKDataProject\Data\Combined_Data\vehicle_accidents_dashboard.html
