In [2]:
# !pip install altair vega_datasets

In [3]:

# Import necessary libraries for data manipulation and visualization
import pandas as pd
import altair as alt
from datetime import datetime, timedelta

# Disable max rows to allow Altair to work with larger datasets
alt.data_transformers.disable_max_rows()


DataTransformerRegistry.enable('default')

In [4]:

# Load accident data from the specified dataset
accident_dataset = pd.read_csv('filtered_dataset_1_california.csv') 

# Define key features to consider in road conditions
important_road_features = ['Crossing', 'Bump', 'Traffic_Signal', 'Junction']

# Remove rows with missing values in the important road features
filtered_accident_data = accident_dataset.dropna(subset=important_road_features)


In [5]:
# Task 1 and Task 2: Analyze accident frequency in major cities and the impact of weather on accident severity

# Initialize a selection mechanism for interactive visualization linking
selection_brush = alt.selection(type='interval')

# Determine the top 50 cities by accident records
top_cities = filtered_accident_data['City'].value_counts().head(50).index

# Keep only the data from these top 50 cities
city_specific_data = filtered_accident_data[filtered_accident_data['City'].isin(top_cities)]

# Generate a heatmap to display accident frequency by city
city_accident_heatmap = alt.Chart(city_specific_data).mark_rect().encode(
    y=alt.Y('City:N', sort='-x'),  # Sort cities on the y-axis
    color=alt.Color('count():Q', scale=alt.Scale(scheme='reds'), legend=alt.Legend(title="Accident Counts")),  # Apply a red color scheme for visual impact
    x=alt.X('count()', title='Number of Records')  # X-axis represents the count of records per city
).properties(
    width=200,  # Heatmap width
    height=500,  # Heatmap height
    title='Accident Frequency in Top 50 Cities of California'
).add_params(selection_brush)

# Prepare data for analyzing weather impact on accident severity
weather_impact_data = city_specific_data[['City', 'Weather_Condition', 'Severity', 'Temperature(F)', 'Humidity(%)','Pressure(in)']]

# Generate a SPLOM to explore correlations between weather conditions and accident severity
weather_severity_splom = alt.Chart(weather_impact_data).mark_circle().encode(
    alt.X(alt.repeat("column"), type='quantitative', scale=alt.Scale(zero=False)),
    alt.Y(alt.repeat("row"), type='quantitative', scale=alt.Scale(zero=False)),
    color=alt.Color('Severity:N', scale=alt.Scale(range=['yellow', 'lightcoral', 'red', 'darkred']), legend=alt.Legend(title="Severity Levels")),  # Severity color coding
    tooltip=['City:N', 'Weather_Condition:N', 'Severity:Q', 'Temperature(F):Q', 'Humidity(%):Q', 'Pressure(in):Q']
).properties(
    width=150,  # SPLOM chart width
    height=150  # SPLOM chart height
).repeat(
    row=['Temperature(F)', 'Humidity(%)', 'Pressure(in)'],
    column=['Temperature(F)', 'Humidity(%)', 'Pressure(in)']
).transform_filter(
    selection_brush
).properties(
    title='Weather Conditions vs. Accident Severity SPLOM'
)

# Compile heatmap and SPLOM into a unified dashboard view
comprehensive_dashboard = alt.hconcat(city_accident_heatmap, weather_severity_splom)


   Use 'selection_point()' or 'selection_interval()' instead; these functions also include more helpful docstrings.


In [6]:
#Task3 : Effect of road conditions on severity of accident

# Convert boolean road feature columns to string for display purposes
for feature in important_road_features:
    filtered_accident_data[feature] = filtered_accident_data[feature].astype(str)

# Highlight selection for interactivity within the bar plots
highlight_selection = alt.selection_single(on='mouseover', empty='none')

# Create bar plots for each road feature to display its impact on accident severity
barplot_list = []
for feature in important_road_features:
    barplot = alt.Chart(filtered_accident_data).mark_bar().encode(
        x=alt.X('average(Severity):Q', title='Average Severity'),
        y=alt.Y(
            f'{feature}_Presence:N',
            axis=alt.Axis(title=None),
            scale=alt.Scale(domain=['Presence', 'Absence'])
        ),
        color=alt.condition(
            alt.datum[f'{feature}_Presence'] == 'Presence',
            alt.value('lightsalmon'),  # Color for presence of the feature
            alt.value('darkred')  # Color for absence of the feature
        ),
        opacity=alt.condition(
            highlight_selection,
            alt.value(1),  # Full opacity for selected bars
            alt.value(0.7)  # Reduced opacity otherwise
        )
    ).transform_calculate(
        # Create a new field in the data to indicate presence or absence of the feature
        **{f'{feature}_Presence': f'if(datum.{feature} == "True", "Presence", "Absence")'}
    ).properties(
        width=200,
        height=50,  # Adjust the height for clear visibility
        title=f'Impact of {feature} on Accident Severity'  # Dynamic title based on the feature
    ).add_selection(
        highlight_selection  # Add the interactive highlight selection
    )
    barplot_list.append(barplot)  # Append the configured barplot to the list

# Concatenate all bar plots vertically to form a comprehensive visualization
combined_bar_charts = alt.vconcat(*barplot_list).properties(title="Impact of Road Conditions on Accident Severity")

#combined_chart




In [7]:
# Task 4: Identify the peak hours of accidents

# Reload the dataset to ensure it's in its original state for this task
original_accident_data = pd.read_csv('filtered_dataset_1_california.csv')

# Convert the 'Start_Time' column to datetime format for easier manipulation
original_accident_data['Start_Time'] = pd.to_datetime(original_accident_data['Start_Time'])

# Extract the hour from the 'Start_Time' and format it into 12-hour AM/PM format
original_accident_data['Hour_of_Day_AMPM'] = original_accident_data['Start_Time'].dt.strftime('%I %p')

# Generate a histogram to visualize the number of accidents occurring at different hours of the day
accident_hour_histogram = alt.Chart(original_accident_data).mark_area(
    line={'color': 'darkred'},  # Use a dark red color for the line for better visibility
    color=alt.Gradient(
        gradient='linear',  # Linear gradient for the fill color
        stops=[alt.GradientStop(color='lightcoral', offset=0),  # Begin with light coral
               alt.GradientStop(color='white', offset=1)],  # Transition to white
        x1=1, x2=1, y1=1, y2=0
    )
).encode(
    x=alt.X('Hour_of_Day_AMPM:N', title='Hour of the Day', sort='ascending'),  # Sort the x-axis to maintain chronological order
    y=alt.Y('count()', title='Number of Accidents'),  # Y-axis represents the count of accidents
    tooltip=[alt.Tooltip('count()', title='Number of Accidents'), 'Hour_of_Day_AMPM']  # Tooltip for detailed information
).properties(
    title='Accidents by Hour of the Day (AM/PM)',  # Chart title
    width=600,  # Chart width
    height=400  # Chart height
).interactive()  # Enable interactivity

# Combine the previous visualizations with the histogram into an extended dashboard
final_dashboard_extended = alt.vconcat(comprehensive_dashboard, accident_hour_histogram)

# Display the final extended dashboard
# final_dashboard_extended


In [8]:
# Finalize and combine the visualizations into one comprehensive dashboard

# First, combine the individual components (road condition impacts and the histogram of accidents by hour) horizontally
extended_dashboard_components = alt.hconcat(combined_bar_charts, accident_hour_histogram).properties(
    title="Detailed Analysis of Road Conditions and Accident Frequencies by Time"
)

# Now, vertically concatenate the previously created comprehensive dashboard with the new combined components
# This creates a multi-view visualization dashboard that incorporates all aspects of the data analysis
final_group_7C_dashboard = alt.vconcat(comprehensive_dashboard, extended_dashboard_components).properties(
    title="Comprehensive Traffic Accident Analysis Dashboard for California"
)

# Save the final comprehensive dashboard to an HTML file for easy sharing and viewing in web browsers
final_group_7C_dashboard.save('SystemC.html')

# Optionally, display the final comprehensive dashboard in the output for an immediate view
# final_group_7C_dashboard
