# IV

In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
import altair as alt

alt.data_transformers.enable('vegafusion')

DataTransformerRegistry.enable('vegafusion')

In [2]:
df1=pd.read_csv('filtered_dataset_1_california.csv')

In [3]:
df = df1[df1['State'] == 'CA']
df=df.sample(n=5000)

In [4]:
#group weather conditions
weather_bins = {
    'Clear': ['Clear', 'Fair'],
    'Cloudy': ['Cloudy', 'Mostly Cloudy', 'Partly Cloudy', 'Scattered Clouds'],
    'Rainy': ['Light Rain', 'Rain', 'Light Freezing Drizzle', 'Light Drizzle', 'Heavy Rain', 'Light Freezing Rain', 'Drizzle', 'Light Freezing Fog', 'Light Rain Showers', 'Showers in the Vicinity', 'T-Storm', 'Thunder', 'Patches of Fog', 'Heavy T-Storm', 'Heavy Thunderstorms and Rain', 'Funnel Cloud', 'Heavy T-Storm / Windy', 'Heavy Thunderstorms and Snow', 'Rain / Windy', 'Heavy Rain / Windy', 'Squalls', 'Heavy Ice Pellets', 'Thunder / Windy', 'Drizzle and Fog', 'T-Storm / Windy', 'Smoke / Windy', 'Haze / Windy', 'Light Drizzle / Windy', 'Widespread Dust / Windy', 'Wintry Mix', 'Wintry Mix / Windy', 'Light Snow with Thunder', 'Fog / Windy', 'Snow and Thunder', 'Sleet / Windy', 'Heavy Freezing Rain / Windy', 'Squalls / Windy', 'Light Rain Shower / Windy', 'Snow and Thunder / Windy', 'Light Sleet / Windy', 'Sand / Dust Whirlwinds', 'Mist / Windy', 'Drizzle / Windy', 'Duststorm', 'Sand / Dust Whirls Nearby', 'Thunder and Hail', 'Freezing Rain / Windy', 'Light Snow Shower / Windy', 'Partial Fog', 'Thunder / Wintry Mix / Windy', 'Patches of Fog / Windy', 'Rain and Sleet', 'Light Snow Grains', 'Partial Fog / Windy', 'Sand / Dust Whirlwinds / Windy', 'Heavy Snow with Thunder', 'Heavy Blowing Snow', 'Low Drifting Snow', 'Light Hail', 'Light Thunderstorm', 'Heavy Freezing Drizzle', 'Light Blowing Snow', 'Thunderstorms and Snow', 'Heavy Rain Showers', 'Rain Shower / Windy', 'Sleet and Thunder', 'Heavy Sleet and Thunder', 'Drifting Snow / Windy', 'Shallow Fog / Windy', 'Thunder and Hail / Windy', 'Heavy Sleet / Windy', 'Sand / Windy', 'Heavy Rain Shower / Windy', 'Blowing Snow Nearby', 'Blowing Sand', 'Heavy Rain Shower', 'Drifting Snow', 'Heavy Thunderstorms with Small Hail'],
    'Windy': ['Blowing Dust / Windy', 'Fair / Windy', 'Mostly Cloudy / Windy', 'Light Rain / Windy', 'T-Storm / Windy', 'Blowing Snow / Windy', 'Freezing Rain / Windy', 'Light Snow and Sleet / Windy', 'Sleet and Thunder / Windy', 'Blowing Snow Nearby', 'Heavy Rain Shower / Windy'],
    'Hail': ['Hail'],
    'Volcanic Ash': ['Volcanic Ash'],
    'Tornado': ['Tornado']
}

def map_weather_to_bins(weather):
    for bin_name, bin_values in weather_bins.items():
        if weather in bin_values:
            return bin_name
    return 'Other' 

df['Weather'] = df['Weather_Condition'].apply(map_weather_to_bins)

In [5]:
# Enable Altair data transformer for large datasets
alt.data_transformers.enable('default', max_rows=None)

df['Start_Time'] = pd.to_datetime(df['Start_Time'])
df['Year'] = df['Start_Time'].dt.year
df['Hour'] = df['Start_Time'].dt.hour

In [6]:
#Code for california map
import geopandas as gpd

# Load California GeoJSON
california_geojson = gpd.read_file('California_County_Boundaries 2.geojson')
california_json = california_geojson.to_json()

# Ensure the dataset is ready for filtering
df['Temperature(F)'] = df['Temperature(F)'].astype(float)
df['Humidity(%)'] = df['Humidity(%)'].astype(float)
df['Pressure(in)'] = df['Pressure(in)'].astype(float)
df['Visibility(mi)'] = df['Visibility(mi)'].astype(float)

# Define selections
weather_selection = alt.selection_multi(fields=['Weather'], bind='legend')

# Plot California state boundary
state_boundary = alt.Chart(alt.Data(values=california_json)).mark_geoshape(
    fill='lightgray',
    stroke='black'
).properties(
    width=600,
    height=400
).project('albersUsa')

city_points = alt.Chart(df).mark_circle().encode(
    latitude='Start_Lat:Q',
    longitude='Start_Lng:Q',
    color='Weather:N',
    opacity=alt.condition(weather_selection, alt.value(1), alt.value(0.5)),  
    size=alt.condition(weather_selection, alt.value(100), alt.value(0)),  
    tooltip=[
        alt.Tooltip('City:N', title="City"),
        alt.Tooltip('Weather_Condition:N',title="Weather"),
        alt.Tooltip('Humidity(%):Q', title="Humidity (%)"),
        alt.Tooltip('Pressure(in):Q', title="Pressure (in)"),
        alt.Tooltip('Temperature(F):Q', title="Temperature (F)"),
    ]
).add_selection(
    weather_selection  
).project('albersUsa')

# Combine the state boundary and city points
california_map = state_boundary + city_points

# 'california_map' is your final Altair chart object
#california_map

In [7]:
#code to aggregate road conditions into a single column 'Road_Condition'
road_conditions = [ 'Bump', 'Crossing',  'Junction',   'Traffic_Signal',]
df['Road_Condition'] = np.select(
    [df[cond] == True for cond in road_conditions],
    road_conditions,
    default='Others'
)
# Dropdown
road_condition_dropdown = alt.binding_select(options=[None] + sorted(df['Road_Condition'].dropna().unique().tolist()), name='Road Condition: ')
road_condition_select = alt.selection_single(fields=['Road_Condition'], bind=road_condition_dropdown, name="Road Condition Select")
severity_dropdown = alt.binding_select(options=[None] + sorted(df['Severity'].unique().tolist()), name='Severity: ')
severity_select = alt.selection_single(fields=['Severity'], bind=severity_dropdown, name="Severity Select")
hour_slider = alt.binding_range(min=0, max=23, step=1, name='Hour: ')
hour_select = alt.selection_single(fields=['Hour'], bind=hour_slider, name="Hour Select")
weather_dropdown = alt.binding_select(options=[None] + sorted(df['Weather'].dropna().unique().tolist()), name='Weather: ')
weather_select = alt.selection_single(fields=['Weather'], bind=weather_dropdown, name="Weather Select")

# Base chart for accidents with added road condition selection
accidents_base = alt.Chart(df).transform_filter(
    severity_select
).transform_filter(
    weather_select
).transform_filter(
    hour_select
).transform_filter(
    road_condition_select  
).mark_line().encode(
    x=alt.X('Hour:O', title='Hour of the Day'),
    y=alt.Y('count()', title='Number of Accidents'),
    color=alt.Color('Severity:N', legend=alt.Legend(title="Severity Level")),
    tooltip=['Severity', 'Weather', alt.Tooltip('count()', title='Accidents'), 'Road_Condition']
).properties(
    width=800,
    height=400,
    title='Temporal and Weather Analysis of Accidents with Road Conditions'
)

# Combine the base chart with all selections including road condition
final_chart = accidents_base.add_selection(
    severity_select,
    weather_select,
    hour_select,
    road_condition_select  
)

#final_chart

In [8]:
#Code using brush
# Load California GeoJSON
california_geojson = gpd.read_file('California_County_Boundaries 2.geojson')
california_json = california_geojson.to_json()

# Define the brush for selection
brush = alt.selection_interval()

# Define the shared weather selection dropdown
weather_dropdown = alt.binding_select(options=[None] + sorted(df['Weather'].dropna().unique().tolist()), name='Weather: ')
weather_select = alt.selection_single(fields=['Weather'], bind=weather_dropdown, name="Weather Select")

# Plot California state boundary
state_boundary = alt.Chart(alt.Data(values=california_json)).mark_geoshape(
    fill='lightgray',
    stroke='black'
).properties(
    width=600,
    height=400,
    title='California Counties'
).project('albersUsa')

# Define city_points with weather selection
city_points = alt.Chart(df).mark_circle().encode(
    latitude='Start_Lat:Q',
    longitude='Start_Lng:Q',
    color=alt.condition(weather_select, 'Weather:N', alt.value('lightgray')),  
    tooltip=[
        alt.Tooltip('City:N', title="City"),
        alt.Tooltip('Weather:N', title="Weather"),
        alt.Tooltip('Humidity(%):Q', title="Humidity (%)"),
        alt.Tooltip('Pressure(in):Q', title="Pressure (in)"),
        alt.Tooltip('Temperature(F):Q', title="Temperature (F)"),
    ]
).transform_filter(
    weather_select  
).add_selection(
    brush  
).project('albersUsa')

# Combine the state boundary and city points
california_map = state_boundary + city_points

# Define additional controls for severity and road condition
severity_dropdown = alt.binding_select(options=[None] + sorted(df['Severity'].unique().tolist()), name='Severity: ')
severity_select = alt.selection_single(fields=['Severity'], bind=severity_dropdown)

road_condition_dropdown = alt.binding_select(options=[None] + sorted(df['Road_Condition'].unique().tolist()), name='Road Condition: ')
road_condition_select = alt.selection_single(fields=['Road_Condition'], bind=road_condition_dropdown)

# Base chart for accidents with added selections
accidents_base = alt.Chart(df).transform_filter(
    weather_select & severity_select & road_condition_select & brush
).mark_line().encode(
    x='Hour:O',
    y=alt.Y('count()', title='Number of Accidents'),
    color='Severity:N',
    tooltip=['Severity', 'Weather', alt.Tooltip('count()', title='Accidents'), 'Road_Condition']
).properties(
    width=800,
    height=400,
    title='Temporal and Weather Analysis of Accidents with Road Conditions'
).add_selection(
    weather_select,
    severity_select,
    road_condition_select
)

# Combine california_map and accidents_base charts horizontally
combined_chart = alt.hconcat(california_map, accidents_base)

# Display and save the combined chart
#combined_chart.display()
#combined_chart.save('combined_chart.html')

In [9]:
#Final dashboard
# Load California GeoJSON
california_geojson = gpd.read_file('California_County_Boundaries 2.geojson')
california_json = california_geojson.to_json()

# Define the shared weather selection dropdown
weather_dropdown = alt.binding_select(options=[None] + sorted(df['Weather'].dropna().unique().tolist()), name='Weather: ')
weather_select = alt.selection_single(fields=['Weather'], bind=weather_dropdown, name="Weather Select")

# Define the brush for selection
brush = alt.selection_interval()

# California state boundary map configuration
state_boundary = alt.Chart(alt.Data(values=california_json)).mark_geoshape(
    fill='lightgray',
    stroke='black'
).properties(
    width=350,  
    height=450,
    title='California Accident Spots by Weather Conditions'
).project('albersUsa')

# City points with weather selection
city_points = alt.Chart(df).mark_circle().encode(
    latitude='Start_Lat:Q',
    longitude='Start_Lng:Q',
    color=alt.condition(weather_select, 'Weather:N', alt.value('lightgray')),
    tooltip=[
        alt.Tooltip('City:N', title="City"),
        alt.Tooltip('Weather:N', title="Weather"),
        alt.Tooltip('Humidity(%):Q', title="Humidity (%)"),
        alt.Tooltip('Pressure(in):Q', title="Pressure (in)"),
        alt.Tooltip('Temperature(F):Q', title="Temperature (F)"),
    ]
).transform_filter(
    weather_select
).add_selection(
    brush
).project('albersUsa').properties(
    width=350,  
    height=450
)

# Define additional controls for severity and road condition
severity_dropdown = alt.binding_select(options=[None] + sorted(df['Severity'].unique().tolist()), name='Severity: ')
severity_select = alt.selection_single(fields=['Severity'], bind=severity_dropdown)

road_condition_dropdown = alt.binding_select(options=[None] + sorted(df['Road_Condition'].unique().tolist()), name='Road Condition: ')
road_condition_select = alt.selection_single(fields=['Road_Condition'], bind=road_condition_dropdown)

# Accidents analysis base chart
accidents_base = alt.Chart(df).transform_filter(
    weather_select & severity_select & road_condition_select & brush
).mark_line().encode(
    x='Hour:O',
    y=alt.Y('count()', title='Number of Accidents'),
    color='Severity:N',
    tooltip=['Severity', 'Weather', alt.Tooltip('count()', title='Accidents'), 'Road_Condition']
).properties(
    width=800,  
    height=500,
    title='Temporal Analysis of Accidents: Severity, Weather, and Road Conditions'
).add_selection(
    weather_select,
    severity_select,
    road_condition_select
)

# Combine california_map and accidents_base charts horizontally with a title
combined_chart = alt.hconcat(
    state_boundary + city_points,
    accidents_base,
    spacing=150,
).configure_view(
    strokeWidth=0  
).resolve_scale(
    color='independent'
).configure_title(
    anchor='middle',  
    fontSize=20
)
# Add padding on the left side of the dashboard
combined_chart = combined_chart.properties(
    padding={"left": 100, "top": 50, "right": 20, "bottom": 20}
)
# Display and optionally save the combined chart
# combined_chart.display()
combined_chart.save('SystemB.html')
