In [1]:
import altair as alt
import pandas as pd
import numpy as np

df6 = pd.read_csv('../data/phoenix_maximum_daily_temps.csv').set_index('Year')
df6.replace(to_replace='M', value=np.nan, inplace=True)
df6 = df6.astype(np.float)
df6.columns = [month for month in range(1, 13)]
df6

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1919,74.0,74.0,86.0,97.0,101.0,113.0,110.0,110.0,106.0,90.0,84.0,76.0
1920,79.0,76.0,83.0,94.0,105.0,110.0,114.0,108.0,105.0,98.0,79.0,75.0
1921,77.0,92.0,95.0,96.0,101.0,110.0,110.0,107.0,105.0,100.0,89.0,76.0
1922,69.0,80.0,83.0,92.0,105.0,114.0,112.0,110.0,107.0,100.0,80.0,74.0
1923,84.0,82.0,84.0,92.0,104.0,112.0,111.0,105.0,105.0,92.0,80.0,71.0
...,...,...,...,...,...,...,...,...,...,...,...,...
2015,81.0,84.0,97.0,99.0,107.0,115.0,110.0,117.0,107.0,106.0,88.0,79.0
2016,76.0,90.0,92.0,97.0,102.0,118.0,112.0,111.0,106.0,100.0,90.0,77.0
2017,76.0,83.0,96.0,99.0,108.0,119.0,118.0,111.0,110.0,99.0,89.0,84.0
2018,83.0,84.0,92.0,100.0,106.0,111.0,116.0,114.0,109.0,93.0,84.0,75.0


## Tidy up the data

In [2]:
data = df6.stack().reset_index().rename(columns={'Year': 'year', 'level_1': 'month', 0:'t_max'})
data

Unnamed: 0,year,month,t_max
0,1919,1,74.0
1,1919,2,74.0
2,1919,3,86.0
3,1919,4,97.0
4,1919,5,101.0
...,...,...,...
1202,2019,3,87.0
1203,2019,4,100.0
1204,2019,5,100.0
1205,2019,6,112.0


## Look at what we have

In [3]:
alt.Chart(data).mark_line().encode(
    x='month',
    y='t_max',
    color='year'
)

## Plot the KDE (Density)

In [4]:
alt.Chart(data).transform_density(
    't_max',
    groupby=['month'],
    as_=['t_max', 'density'],
).mark_area(opacity=0.3).encode(
    x='t_max:Q',
    y='density:Q',
    color='month:N'
).properties(width=800, height=400)