In [30]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

In [31]:
# uploading dataset
import pandas as pd

# Load the dataset using full file path
df = pd.read_csv('/Users/alinafaisal/Desktop/UMich/Spring25/SI649/final_project/Tornado_Tracks.csv')

# Display first few rows
df.head(10)

Unnamed: 0,OBJECTID,om,yr,mo,dy,date,time,tz,st,stf,...,slat,slon,elat,elon,len,wid,fc,Month_Calc,Date_Calc,Shape__Length
0,1,192,1950,10,1,1950/10/01,21:00:00,3,OK,40,...,36.73,-102.52,36.88,-102.3,15.8,10,0,9,1950/10/01 04:00:00+00,32166.649032
1,2,193,1950,10,9,1950/10/09,02:15:00,3,NC,37,...,34.17,-78.6,34.1701,-78.5999,2.0,880,0,9,1950/10/09 04:00:00+00,17.462751
2,3,195,1950,11,20,1950/11/20,02:20:00,3,KY,21,...,37.37,-87.2,37.3701,-87.1999,0.1,10,0,10,1950/11/20 05:00:00+00,17.891905
3,4,196,1950,11,20,1950/11/20,04:00:00,3,KY,21,...,38.2,-84.5,38.2001,-84.4999,0.1,10,0,10,1950/11/20 05:00:00+00,18.016042
4,5,197,1950,11,20,1950/11/20,07:30:00,3,MS,28,...,32.42,-89.13,32.4201,-89.1299,2.0,37,0,10,1950/11/20 05:00:00+00,17.257657
5,6,194,1950,11,4,1950/11/04,17:00:00,3,PA,42,...,40.2,-76.12,40.4,-75.93,15.9,100,0,10,1950/11/04 04:00:00+00,36049.045385
6,7,198,1950,12,2,1950/12/02,15:00:00,3,IL,17,...,38.97,-90.05,39.07,-89.72,18.8,50,0,11,1950/12/02 05:00:00+00,39430.815221
7,8,199,1950,12,2,1950/12/02,16:00:00,3,IL,17,...,38.75,-89.67,38.9,-89.38,18.0,200,0,11,1950/12/02 05:00:00+00,38749.924332
8,9,200,1950,12,2,1950/12/02,16:25:00,3,AR,5,...,36.12,-91.83,36.18,-91.72,7.8,10,0,11,1950/12/02 05:00:00+00,14777.147009
9,10,201,1950,12,2,1950/12/02,17:30:00,3,IL,17,...,38.17,-89.78,38.22,-89.62,9.6,50,0,11,1950/12/02 05:00:00+00,19167.510184


In [32]:
import pandas as pd
import altair as alt
from vega_datasets import data

alt.data_transformers.disable_max_rows()

# Tornado counts by state
tornado_counts = df.groupby('st').agg(tornadoes=('om', 'count')).reset_index()

# FIPS mapping for choropleth
fips_lookup = pd.DataFrame([
    (1, 'AL'), (2, 'AK'), (4, 'AZ'), (5, 'AR'), (6, 'CA'), (8, 'CO'), (9, 'CT'),
    (10, 'DE'), (11, 'DC'), (12, 'FL'), (13, 'GA'), (15, 'HI'), (16, 'ID'),
    (17, 'IL'), (18, 'IN'), (19, 'IA'), (20, 'KS'), (21, 'KY'), (22, 'LA'),
    (23, 'ME'), (24, 'MD'), (25, 'MA'), (26, 'MI'), (27, 'MN'), (28, 'MS'),
    (29, 'MO'), (30, 'MT'), (31, 'NE'), (32, 'NV'), (33, 'NH'), (34, 'NJ'),
    (35, 'NM'), (36, 'NY'), (37, 'NC'), (38, 'ND'), (39, 'OH'), (40, 'OK'),
    (41, 'OR'), (42, 'PA'), (44, 'RI'), (45, 'SC'), (46, 'SD'), (47, 'TN'),
    (48, 'TX'), (49, 'UT'), (50, 'VT'), (51, 'VA'), (53, 'WA'), (54, 'WV'),
    (55, 'WI'), (56, 'WY')
], columns=['id', 'st'])

# Merge tornado counts with FIPS
merged_df = fips_lookup.merge(tornado_counts, on='st', how='left').fillna(0)
states = alt.topo_feature(data.us_10m.url, feature='states')

# State selection parameter
state_click = alt.selection_point(fields=['st'], name="Select")

# Choropleth map
map_chart = alt.Chart(states).mark_geoshape(stroke='white').encode(
    color=alt.condition(
        state_click,
        alt.Color('tornadoes:Q', scale=alt.Scale(scheme='blues'), title='Tornadoes'),
        alt.value('lightgray')
    ),
    tooltip=['st:N', 'tornadoes:Q']
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(merged_df, 'id', ['st', 'tornadoes'])
).add_params(
    state_click
).project(
    type='albersUsa'
).properties(
    width=500,
    height=300,
    title=alt.TitleParams(
        text='Tornado Frequency by State (1950–2022)',
        fontSize=20 
    )
)

# Top 20 states bar chart
top_20 = merged_df.sort_values(by='tornadoes', ascending=False).head(20)
top_20_avg = int(top_20['tornadoes'].mean())

threshold_slider = alt.binding_range(
    min=500, max=10000, step=100,
    name='Avg Threshold of Tornado Count (2797):'
)
threshold_param = alt.param(value=top_20_avg, bind=threshold_slider)

bars = alt.Chart(top_20).mark_bar().encode(
    y=alt.Y('st:N', sort='-x', title='State'),
    x=alt.X('tornadoes:Q', title='Tornado Count'),
    color=alt.Color('tornadoes:Q', scale=alt.Scale(scheme='orangered')),
    tooltip=['st:N', 'tornadoes:Q'],
    opacity=alt.condition(state_click, alt.value(1), alt.value(0.3))
)

threshold_line = alt.Chart(pd.DataFrame({'value': [1]})).mark_rule(
    strokeDash=[5, 2], color='black'
).encode(
    x='value:Q'
).transform_calculate(
    value=f'{threshold_param.name}'
)

bar_chart = alt.layer(bars, threshold_line).add_params(
    state_click, threshold_param
).properties(
    width=500,
    height=300,
    title=alt.TitleParams(
        text='Top 20 States by Tornado Count',
        fontSize=20 
    )
)

# Combine charts without configuration first
main_chart = alt.hconcat(map_chart, bar_chart)

# Add title and caption together
final = alt.vconcat(
    main_chart
)

# Apply configs once at the end
final_chart = final.configure_title(
    anchor='middle',
    fontSize=30,
    font='Helvetica',
    color='navy',#black
    offset=30 
).configure_view(
    fill='white'
).properties(
    title='U.S. Tornado Distribution and State-Level Trends (1950–2022)',
    background='#e6f7ff'  # light pastel blue
)

final_chart

In [33]:
final_chart.save('tornado_visualization.html')

In [34]:
import pandas as pd
import altair as alt

alt.data_transformers.disable_max_rows()

# Preprocess
df_subset = df[['st', 'yr', 'mag', 'inj', 'fat']].copy()
df_subset['Period'] = pd.cut(df_subset['yr'], bins=[1949, 1985, 2022],
                             labels=['1950–1985', '1986–2022'])
df_subset = df_subset.dropna(subset=['Period'])

melted = df_subset.melt(
    id_vars=['st', 'yr', 'Period'],
    value_vars=['mag', 'inj', 'fat'],
    var_name='Metric',
    value_name='Value'
)

melted['Metric'] = melted['Metric'].map({
    'mag': 'Magnitude',
    'inj': 'Injuries',
    'fat': 'Fatalities',
})

grouped = melted.groupby(['st', 'Period', 'Metric']).agg(Value=('Value', 'sum')).reset_index()

# Dropdowns
states = sorted(grouped['st'].unique())
dropdown_1 = alt.binding_select(options=states, name='State 1:')
dropdown_2 = alt.binding_select(options=states, name='State 2:')
state_1 = alt.param(value=states[0], bind=dropdown_1)
state_2 = alt.param(value=states[1], bind=dropdown_2)

# Chart builder
def bar_chart(state_param, period, label, color_scheme, show_y=True):
    base = alt.Chart(grouped).transform_filter(
        (alt.datum.st == state_param) & (alt.datum.Period == period)
    ).encode(
        y=alt.Y('Metric:N', sort='-x', title='Metric') if show_y else alt.Y('Metric:N', axis=None),
        x=alt.X('Value:Q', title='Total Value', axis=alt.Axis(grid=True, format=',d')),
        color=alt.Color('Metric:N', scale=alt.Scale(scheme=color_scheme), legend=None),
        tooltip=['Metric', 'Value']
    )

    bars = base.mark_bar()
    text = base.mark_text(
        align='left',
        dx=5,
        color='steelblue'
    ).encode(
        text=alt.Text('Value:Q', format=',')
    )

    return alt.layer(bars, text).properties(width=340, height=240, title=label)

# Combine charts per state
top_row = alt.hconcat(
    bar_chart(state_1, '1950–1985', 'State 1 (1950–1985)', 'blues'),
    bar_chart(state_1, '1986–2022', 'State 1 (1986–2022)', 'blues', show_y=False)
).resolve_axis(y='shared')

bottom_row = alt.hconcat(
    bar_chart(state_2, '1950–1985', 'State 2 (1950–1985)', 'oranges'),
    bar_chart(state_2, '1986–2022', 'State 2 (1986–2022)', 'oranges', show_y=False)
).resolve_axis(y='shared')

# Caption chart

# Combine chart and caption, apply config at the end
final_chart = alt.vconcat(
    alt.vconcat(top_row, bottom_row).add_params(state_1, state_2).properties(
        title='Comparison of Tornado Metrics Across Two States and Time Periods'
    )
).configure_title(
    anchor='middle',
    fontSize=30,
    font='Helvetica',
    color='navy'
).configure_view(
    fill='white'
).properties(
    background='#e6f7ff'  # Light pastel blue
)

final_chart
# 'aliceblue' (very light)
# 'lightcyan'
# 'powderblue'
# 'lightblue'
# '#e6f7ff' (a very soft, light blue)
# '#f0f8ff' (same as ‘aliceblue’)
# '#dbefff' (a gentle, pastel blue) 

In [35]:
final_chart.save('bar_chart.html')