# Hurricane tracts

This is a dataset I typically use for my Intro to Tableau workshop. I wanted to show how you could do the same visualizations in Altair that I cover there.

In [2]:
import pandas as pd
import altair as alt
from numpy import float64

# Avoid MaxRowsError
import vegafusion as vf
vf.enable()

vegafusion.enable(mimetype='html', row_limit=10000, embed_options=None)

## Read in hurricane tracts data

Specify that Season shouldn't be parsed as a number, even though it looks like that – keep as string

In [4]:
df = pd.read_csv('data/hurricane_tracts.tsv', 
                    sep='\t',
                    dtype={'Season':object, 'Wind in knots':float64},
                    parse_dates=['ISO_time'])

## Change ISO_time to a true date and time data type

and then check the types again

In [5]:
df.dtypes

Longitude                       float64
Latitude                        float64
Serial_Num                       object
Season                           object
Num                               int64
Basin                            object
Sub_basin                        object
Hurricane Name                   object
ISO_time                 datetime64[ns]
Nature                           object
Wind in knots                   float64
Pressure in millibars           float64
Wind Percentile                 float64
Pressure Percentile             float64
Track_type                       object
State                            object
SubRegion                        object
Region                           object
Country                          object
dtype: object

## Cumulative count of records per hurricane season

In [12]:
# Season is just a column of strings denoting in which hurricane season
# the current row's measurement was taken. 
# Feeding only the data needed speeds things up quite a bit in this case.

season_records = df[['Season']]

# frame is number of records [preceding,following] offset from current data object
# None indicates all, so default [None, 0] indicates all preceding and zero following
# Doesn't seem to change anything if I add sorting by changing to: 
# ).transform_window(
#     sort=[{'field': 'Season'}],
#     frame=[None, 0],
#     cumulative_count='sum(count)',
# )

alt.Chart(season_records).mark_line().encode(
    y='cumulative_count:Q',
    x='Season:Q'
).transform_aggregate(
    count='count()',
    groupby=['Season']
).transform_window(
    cumulative_count='sum(count)',
    frame=[None, 0]
)