# Hurricane tracts

This is a dataset I typically use for my Intro to Tableau workshop. I wanted to show how you could do the same visualizations in Altair that I cover there.

In [73]:
import pandas as pd
import altair as alt
from altair import datum

# Avoid MaxRowsError
import vegafusion as vf
vf.enable()

vegafusion.enable(mimetype='html', row_limit=10000, embed_options=None)

## Read in hurricane tracts data

Specify that Season shouldn't be parsed as a number, even though it looks like that – keep as string

In [74]:
tracts = pd.read_csv('data/hurricane_tracts.tsv', 
                        sep='\t',
                        parse_dates=['ISO_time'],
                        dtype={'Season':object})
tracts.dtypes

Longitude                       float64
Latitude                        float64
Serial_Num                       object
Season                           object
Num                               int64
Basin                            object
Sub_basin                        object
Hurricane Name                   object
ISO_time                 datetime64[ns]
Nature                           object
Wind in knots                     int64
Pressure in millibars           float64
Wind Percentile                 float64
Pressure Percentile             float64
Track_type                       object
State                            object
SubRegion                        object
Region                           object
Country                          object
dtype: object

### Summary of the data statistics

In [75]:
tracts.describe()

Unnamed: 0,Longitude,Latitude,Num,Wind in knots,Pressure in millibars,Wind Percentile,Pressure Percentile
count,46933.0,46933.0,46933.0,46933.0,16672.0,46933.0,16672.0
mean,-66.007266,26.902676,7.387744,53.266018,991.885437,50.661692,52.634471
std,19.361264,9.934079,5.260871,25.521932,19.24006,31.969774,29.944196
min,-118.3,7.2,1.0,0.0,882.0,-100.0,0.0
25%,-81.1,19.0,3.0,35.0,984.0,32.212,26.462
50%,-68.1,26.3,6.0,50.0,998.0,57.961,56.565
75%,-53.0,33.0,10.0,70.0,1006.0,78.425,78.332
max,28.0,72.5,31.0,165.0,1024.0,99.997,99.996


## Number of records per season bar chart

In [76]:
alt.Chart(tracts).mark_bar().encode(
    x = 'Season:O',
    y = 'count()'
)

## Vertical number of records per season

In [77]:
alt.Chart(tracts).mark_bar().encode(
    x = 'count()',
    y = 'Season:O'
)

## Sorted number of records per season

In [78]:
alt.Chart(tracts[['Season']]).mark_bar().encode(
    x='count():Q',
    y=alt.Y('Season:O',
        sort=alt.EncodingSortField(field='Season', 
                                   order='descending', 
                                   op='count')),
)

## Top 10 sorted number of records per season

In [79]:
alt.Chart(tracts[['Season']]).mark_bar().encode(
    y=alt.Y('Season:O').sort('-x'),
    x='count:Q'
).transform_aggregate(
    count='count()',
    groupby=['Season']
).transform_window(
    rank='rank(count)',
    sort=[alt.SortField('count', order='descending')]
).transform_filter(
    datum.rank <= 10
)

## Sorted hurricanes per season

In [80]:
alt.Chart(tracts[['Season','Serial_Num']]).mark_bar().encode(
    y=alt.Y('Season:O'),
    x='distinct(Serial_Num):Q'
)

## Top 10 hurricane seasons

In [81]:
alt.Chart(tracts).mark_bar().encode(
    y=alt.Y('Season:O').sort('-x'),
    x='n_hurricanes:Q'
).transform_aggregate(
    n_hurricanes='distinct(Serial_Num)',
    groupby=['Season']
).transform_window(
    rank='rank(n_hurricanes)',
    sort=[alt.SortField('n_hurricanes', order='descending')]
).transform_filter('datum.rank <= 10')

## Hurricane Season

In [82]:
alt.Chart(tracts).mark_line().encode(
    x = 'month(ISO_time):T',
    y = 'distinct(Serial_Num):Q'
)

## Hurricanes per year timeline

In [83]:
alt.Chart(tracts).mark_line().encode(
    x='year(ISO_time):T',
    y='sum(Wind in knots):Q'
)

## Scatterplot

In [101]:
alt.Chart(tracts).mark_circle(opacity=0.3).encode(
    x='mean_wind:Q',
    y=alt.Y('mean_pressure:Q').scale(zero=False),
    size='count():Q',
    color='Nature:N'
).transform_filter(
    datum['Pressure in millibars'] > 0
).transform_aggregate(
    mean_wind='mean(Wind in knots)',
    mean_pressure='mean(Pressure in millibars)',
    groupby=['Serial_Num','Nature']
)

In [100]:
tracts.Nature.value_counts()

TS    40518
ET     4318
DS      909
SS      816
NR      372
Name: Nature, dtype: int64

## Mapped tracts for 2005

In [86]:
# If you have vega_datasets installed

# from vega_datasets import data
# states = alt.topo_feature(data.us_10m.url, feature='states')

In [99]:
states = alt.topo_feature('https://vega.github.io/vega-datasets/data/us-10m.json', 
                          feature='states')

background = alt.Chart(states).mark_geoshape(
    fill='lightgray',
    stroke='white'
).project('albersUsa').properties(
    width=500,
    height=400
)

points = alt.Chart(tracts).mark_circle(opacity=0.6).encode(
    longitude='Longitude:Q',
    latitude='Latitude:Q',
    size='Wind in knots:Q',
    color='Nature:N',
    tooltip=['Hurricane Name']
).transform_filter(
    datum.Season == '2005'
).project(
    "albersUsa"
).properties(
    width=500,
    height=400
)

background + points