In [9]:
# !pip install "vegafusion[embed]>=1.4.0"

In [10]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
import altair as alt

alt.data_transformers.enable('vegafusion')

DataTransformerRegistry.enable('vegafusion')

In [11]:
sourcedata = pd.read_csv(r'filtered_dataset_1_california.csv')
df = pd.DataFrame(sourcedata)
df = df[df['State'] == 'CA'] 
brush = alt.selection_interval()

In [12]:
# severity colours
severity_colors = {
    1: 'lightblue',
    2: 'gold',
    3: '#ff7f00', #tangerine
    4: '#d62728' #maroon
}
# Strip plot with jitter
points = alt.Chart(df).mark_point(filled=False,size=8).encode(
    x=alt.X('County:N', title='Counties'),
    y=alt.Y('Start_Time:T', title='Start Time'),
    xOffset="jitter:Q",
    color=alt.condition(brush,
                        alt.Color('Severity:Q', scale=alt.Scale(domain=list(severity_colors.keys()), range=list(severity_colors.values())), legend = alt.Legend(title = 'Severity')),
                        alt.value('lightgray')),

    size=alt.Size('Severity:O',
                  scale=alt.Scale(range=[10, 40, 70, 150])),
    tooltip=['County', 'Start_Time','Weather_Condition','Temperature(F)','Humidity(%)','Pressure(in)']
).transform_calculate(
    jitter='random()'
).properties(
    width=1000,
    height=400
).add_params(
    brush
).interactive()
points.resolve_scale(xOffset='independent')

#parse the time
df['Start_Time'] = pd.to_datetime(df['Start_Time'])
df['Year'] = df['Start_Time'].dt.year
df['Hour'] = df['Start_Time'].dt.hour
#Hour slider
hour_slider = alt.binding_range(min=0, max=23, step=1, name='Hour of each day: ')
hour_selection = alt.selection_point(fields=['Hour'], bind=hour_slider, name="Select Hour")
 
filtered_chart = points.add_params(hour_selection).transform_filter(hour_selection)
#Road POI
road_conditions = [ 'Amenity','Bump', 'Crossing',  'Junction', 'Traffic_Signal',]
df['Road_Condition'] = np.select(
    [df[cond] == True for cond in road_conditions],
    road_conditions,
    default='Others'
)
# Dropdown for Road Conditions
road_condition_dropdown = alt.binding_select(options=[None] + sorted(df['Road_Condition'].dropna().unique().tolist()), name='Road POI Condition: ')
road_condition_select = alt.selection_point(fields=['Road_Condition'], bind=road_condition_dropdown, name="Road Condition Select")
selected_chart = filtered_chart.add_params(road_condition_select).transform_filter(road_condition_select)

# bin weather
weather_bins = {
    'Clear': ['Clear', 'Fair'],
    'Cloudy': ['Cloudy', 'Mostly Cloudy', 'Partly Cloudy', 'Scattered Clouds'],
    'Rainy': ['Light Rain', 'Rain', 'Light Freezing Drizzle', 'Light Drizzle', 'Heavy Rain', 'Light Freezing Rain', 'Drizzle', 'Light Freezing Fog', 'Light Rain Showers', 'Showers in the Vicinity', 'T-Storm', 'Thunder', 'Patches of Fog', 'Heavy T-Storm', 'Heavy Thunderstorms and Rain', 'Funnel Cloud', 'Heavy T-Storm / Windy', 'Heavy Thunderstorms and Snow', 'Rain / Windy', 'Heavy Rain / Windy', 'Squalls', 'Heavy Ice Pellets', 'Thunder / Windy', 'Drizzle and Fog', 'T-Storm / Windy', 'Smoke / Windy', 'Haze / Windy', 'Light Drizzle / Windy', 'Widespread Dust / Windy', 'Wintry Mix', 'Wintry Mix / Windy', 'Light Snow with Thunder', 'Fog / Windy', 'Snow and Thunder', 'Sleet / Windy', 'Heavy Freezing Rain / Windy', 'Squalls / Windy', 'Light Rain Shower / Windy', 'Snow and Thunder / Windy', 'Light Sleet / Windy', 'Sand / Dust Whirlwinds', 'Mist / Windy', 'Drizzle / Windy', 'Duststorm', 'Sand / Dust Whirls Nearby', 'Thunder and Hail', 'Freezing Rain / Windy', 'Light Snow Shower / Windy', 'Partial Fog', 'Thunder / Wintry Mix / Windy', 'Patches of Fog / Windy', 'Rain and Sleet', 'Light Snow Grains', 'Partial Fog / Windy', 'Sand / Dust Whirlwinds / Windy', 'Heavy Snow with Thunder', 'Heavy Blowing Snow', 'Low Drifting Snow', 'Light Hail', 'Light Thunderstorm', 'Heavy Freezing Drizzle', 'Light Blowing Snow', 'Thunderstorms and Snow', 'Heavy Rain Showers', 'Rain Shower / Windy', 'Sleet and Thunder', 'Heavy Sleet and Thunder', 'Drifting Snow / Windy', 'Shallow Fog / Windy', 'Thunder and Hail / Windy', 'Heavy Sleet / Windy', 'Sand / Windy', 'Heavy Rain Shower / Windy', 'Blowing Snow Nearby', 'Blowing Sand', 'Heavy Rain Shower', 'Drifting Snow', 'Heavy Thunderstorms with Small Hail'],
    'Snowy': ['Light Snow', 'Snow', 'Light Snow / Windy', 'Snow Grains', 'Snow Showers', 'Snow / Windy', 'Light Snow and Sleet', 'Snow and Sleet', 'Light Snow and Sleet / Windy', 'Snow and Sleet / Windy'],
    'Windy': ['Blowing Dust / Windy', 'Fair / Windy', 'Mostly Cloudy / Windy', 'Light Rain / Windy', 'T-Storm / Windy', 'Blowing Snow / Windy', 'Freezing Rain / Windy', 'Light Snow and Sleet / Windy', 'Sleet and Thunder / Windy', 'Blowing Snow Nearby', 'Heavy Rain Shower / Windy'],
    'Hail': ['Hail'],
    'Volcanic Ash': ['Volcanic Ash'],
    'Tornado': ['Tornado']
}

def map_weather_to_bins(weather):
    for bin_name, bin_values in weather_bins.items():
        if weather in bin_values:
            return bin_name
    return 'Other' 

df['Weather'] = df['Weather_Condition'].apply(map_weather_to_bins)
chart_weather = alt.Chart(df).mark_bar().encode(
    x=alt.X('Weather:O'),
    y=alt.Y('count():Q'),
    color=alt.Color('Severity:N', scale=alt.Scale(domain=list(severity_colors.keys()), range=list(severity_colors.values()), type='ordinal'), legend=None)
).transform_filter(
    brush
).properties(width=200, height=200)

# bin Temerature
t_bins = [-8.0, 50, 55, 60, 65, 70, 75, 80, 85, 90, 207]
df['Temperature'] = pd.cut(df['Temperature(F)'], bins=t_bins, labels=['<50°F', '51-55°F', '56-60', '61-65°F', '66-70°F', '71-75°F', '76-80°F', '81-85°F', '86-90°F', '>90°F'], include_lowest=True)
chart_temperature = alt.Chart(df).mark_bar().encode(
    x=alt.X('Temperature:O'),
    y=alt.Y('count():Q'),
    color=alt.Color('Severity:N', scale=alt.Scale(domain=list(severity_colors.keys()), range=list(severity_colors.values()), type='ordinal'), legend=None)
).transform_filter(
    brush
).properties(width=200, height=200)

# bin Humidity
h_bins = [0, 28.5, 29, 29.5, 30, 58.63]
df['Humidity'] = pd.cut(df['Humidity(%)'], bins=h_bins, labels=['0-20', '20-40', '40-60', '60-80', '80-100'], include_lowest=True)
chart_humidity = alt.Chart(df).mark_bar().encode(
    x=alt.X('Humidity:O'),
    y=alt.Y('count():Q'),
    color=alt.Color('Severity:N', scale=alt.Scale(domain=list(severity_colors.keys()), range=list(severity_colors.values()), type='ordinal'), legend=None)
).transform_filter(
    brush
).properties(width=200, height=200)

# bin Pressure
p_bins = [0, 28.5, 29, 29.5, 30, 58.63]
df['Pressure'] = pd.cut(df['Pressure(in)'], bins=p_bins, labels=['<28.5', '<29', '<29.5', '<30', '>=30'], include_lowest=True)
chart_pressure = alt.Chart(df).mark_bar().encode(
    x=alt.X('Pressure:O'),
    y=alt.Y('count():Q'),
    color=alt.Color('Severity:N', scale=alt.Scale(domain=list(severity_colors.keys()), range=list(severity_colors.values()), type='ordinal'), legend=None)
).transform_filter(
    brush
).properties(width=200, height=200)

chart = alt.hconcat(chart_weather,chart_temperature,chart_humidity,chart_pressure)
charts = alt.vconcat(selected_chart,chart).properties(title = "US Car Accident Analysis (California)")
charts.save('SystemA.html')