In [1050]:
#Import pandas and numpy and altair
import pandas as pd
import numpy as np
import altair as alt

In [1051]:
#Read hurdat2.csv
df = pd.read_csv('./hurdat2.csv',parse_dates=['datetime'])
df

Unnamed: 0,identifier,name,num_pts,datetime,record_id,status,latitude,longitude,max_wind,min_pressure,...,sw34ktr,nw34ktr,ne50ktr,se50ktr,sw50ktr,nw50ktr,ne64ktr,se64ktr,sw64ktr,nw64ktr
0,AL011851,,14,1851-06-25 00:00:00,,HU,28.0,-94.8,80.0,,...,,,,,,,,,,
1,AL011851,,14,1851-06-25 06:00:00,,HU,28.0,-95.4,80.0,,...,,,,,,,,,,
2,AL011851,,14,1851-06-25 12:00:00,,HU,28.0,-96.0,80.0,,...,,,,,,,,,,
3,AL011851,,14,1851-06-25 18:00:00,,HU,28.1,-96.5,80.0,,...,,,,,,,,,,
4,AL011851,,14,1851-06-25 21:00:00,L,HU,28.2,-96.8,80.0,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50298,AL192017,RINA,21,2017-11-08 12:00:00,,TS,38.3,-48.8,45.0,994.0,...,0.0,90.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50299,AL192017,RINA,21,2017-11-08 18:00:00,,TS,40.1,-49.0,45.0,992.0,...,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50300,AL192017,RINA,21,2017-11-09 00:00:00,,TS,41.8,-48.8,45.0,991.0,...,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50301,AL192017,RINA,21,2017-11-09 06:00:00,,LO,43.6,-48.0,40.0,993.0,...,0.0,120.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [1052]:
df_minmax = df.groupby(['identifier']).agg({'max_wind':'max','min_pressure':'min'},axis=1)
df_minmax

Unnamed: 0_level_0,max_wind,min_pressure
identifier,Unnamed: 1_level_1,Unnamed: 2_level_1
AL011851,80.0,
AL011852,100.0,961.0
AL011853,50.0,
AL011854,70.0,
AL011855,90.0,
...,...,...
AL282005,45.0,1002.0
AL291969,80.0,979.0
AL292005,60.0,980.0
AL302005,75.0,981.0


In [1053]:
df_hour = df.copy()
df_hour['hour'] = df['datetime'].dt.hour
df_hour = df_hour.groupby(['hour']).agg({'hour':'count'})
pd.DataFrame(df_hour['hour'].sort_values())

Unnamed: 0_level_0,hour
hour,Unnamed: 1_level_1
20,28
1,29
23,31
19,32
17,36
13,38
9,38
2,39
5,39
7,39


In [1054]:
def haversine(lon1,lat1,lon2,lat2,earth_radius=6367):
    lon1,lat1,lon2,lat2 = map(np.radians,[lon1,lat1,lon2,lat2])
    dlon = lon2-lon1
    dlat = lat2-lat1
    a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    c = 2 * np.arcsin(np.sqrt(a))
    km = earth_radius * c
    return km

In [1055]:
#Create time_diff(hours) column
df_speed = df
df_speed['time_diff(hours)'] = df.groupby('identifier')['datetime'].diff(periods=1).dt.total_seconds()/3600
#Create dist column
df_speed['dist'] = np.vectorize(haversine)(
    df_speed['longitude'],
    df_speed['latitude'],
    df_speed['longitude'].shift(1),
    df_speed['latitude'].shift(1)
)
#Calculate km/h
df_speed['speed'] = df_speed['dist']/(df_speed['time_diff(hours)'])
df_speed


Unnamed: 0,identifier,name,num_pts,datetime,record_id,status,latitude,longitude,max_wind,min_pressure,...,se50ktr,sw50ktr,nw50ktr,ne64ktr,se64ktr,sw64ktr,nw64ktr,time_diff(hours),dist,speed
0,AL011851,,14,1851-06-25 00:00:00,,HU,28.0,-94.8,80.0,,...,,,,,,,,,,
1,AL011851,,14,1851-06-25 06:00:00,,HU,28.0,-95.4,80.0,,...,,,,,,,,6.0,58.870532,9.811755
2,AL011851,,14,1851-06-25 12:00:00,,HU,28.0,-96.0,80.0,,...,,,,,,,,6.0,58.870532,9.811755
3,AL011851,,14,1851-06-25 18:00:00,,HU,28.1,-96.5,80.0,,...,,,,,,,,6.0,50.279389,8.379898
4,AL011851,,14,1851-06-25 21:00:00,L,HU,28.2,-96.8,80.0,,...,,,,,,,,3.0,31.424628,10.474876
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50298,AL192017,RINA,21,2017-11-08 12:00:00,,TS,38.3,-48.8,45.0,994.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,211.322388,35.220398
50299,AL192017,RINA,21,2017-11-08 18:00:00,,TS,40.1,-49.0,45.0,992.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,200.765152,33.460859
50300,AL192017,RINA,21,2017-11-09 00:00:00,,TS,41.8,-48.8,45.0,991.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,189.656830,31.609472
50301,AL192017,RINA,21,2017-11-09 06:00:00,,LO,43.6,-48.0,40.0,993.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,210.421792,35.070299


In [1056]:
df_maria = df[(df['name'] == 'MARIA') & (df['datetime'] > '2017-01-01')].copy()
df_maria

Unnamed: 0,identifier,name,num_pts,datetime,record_id,status,latitude,longitude,max_wind,min_pressure,...,se50ktr,sw50ktr,nw50ktr,ne64ktr,se64ktr,sw64ktr,nw64ktr,time_diff(hours),dist,speed
50128,AL152017,MARIA,68,2017-09-16 12:00:00,,TD,12.2,-49.7,30.0,1006.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,3870.474961,
50129,AL152017,MARIA,68,2017-09-16 18:00:00,,TS,12.2,-51.7,40.0,1004.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,217.230412,36.205069
50130,AL152017,MARIA,68,2017-09-17 00:00:00,,TS,12.4,-53.1,45.0,1002.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,153.619969,25.603328
50131,AL152017,MARIA,68,2017-09-17 06:00:00,,TS,12.8,-54.4,55.0,994.0,...,20.0,0.0,20.0,0.0,0.0,0.0,0.0,6.0,147.824307,24.637385
50132,AL152017,MARIA,68,2017-09-17 12:00:00,,TS,13.3,-55.7,60.0,990.0,...,20.0,0.0,20.0,0.0,0.0,0.0,0.0,6.0,151.302381,25.217063
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50191,AL152017,MARIA,68,2017-10-01 12:00:00,,EX,44.9,-35.5,45.0,999.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,352.766370,58.794395
50192,AL152017,MARIA,68,2017-10-01 18:00:00,,EX,46.5,-31.0,45.0,1003.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,391.822153,65.303692
50193,AL152017,MARIA,68,2017-10-02 00:00:00,,EX,47.5,-26.5,40.0,1005.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,358.627130,59.771188
50194,AL152017,MARIA,68,2017-10-02 06:00:00,,EX,48.0,-22.0,40.0,1012.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,340.734199,56.789033


In [1057]:
df_joaquin = df[(df['name'] == 'JOAQUIN') & (df['datetime'] > '2015-01-01')].copy()
df_joaquin['datetime'] = df_joaquin['datetime'] + pd.Timedelta(df_maria['datetime'].iloc[0] - df_joaquin['datetime'].iloc[0])
df_joaquin

Unnamed: 0,identifier,name,num_pts,datetime,record_id,status,latitude,longitude,max_wind,min_pressure,...,se50ktr,sw50ktr,nw50ktr,ne64ktr,se64ktr,sw64ktr,nw64ktr,time_diff(hours),dist,speed
49011,AL112015,JOAQUIN,76,2017-09-16 12:00:00,,LO,26.8,-68.7,20.0,1011.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,1967.601501,
49012,AL112015,JOAQUIN,76,2017-09-16 18:00:00,,LO,26.9,-68.6,20.0,1011.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,14.892449,2.482075
49013,AL112015,JOAQUIN,76,2017-09-17 00:00:00,,LO,27.0,-68.5,20.0,1010.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,14.886609,2.481101
49014,AL112015,JOAQUIN,76,2017-09-17 06:00:00,,LO,27.1,-68.6,25.0,1009.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,14.880751,2.480125
49015,AL112015,JOAQUIN,76,2017-09-17 12:00:00,,LO,27.2,-68.8,30.0,1007.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,22.684445,3.780741
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49082,AL112015,JOAQUIN,76,2017-10-03 18:00:00,,EX,36.0,-9.0,20.0,1011.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,67.273849,11.212308
49083,AL112015,JOAQUIN,76,2017-10-04 00:00:00,,EX,35.5,-8.7,20.0,1011.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,61.799722,10.299954
49084,AL112015,JOAQUIN,76,2017-10-04 06:00:00,,EX,35.1,-8.4,20.0,1011.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,52.115972,8.685995
49085,AL112015,JOAQUIN,76,2017-10-04 12:00:00,,EX,35.0,-8.0,15.0,1012.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,38.048004,6.341334


In [1058]:
maria = alt.Chart(df_maria).mark_line().encode(
    alt.X('datetime', scale=alt.Scale(zero=False)),
    alt.Y('speed', scale=alt.Scale(zero=False)),
    order='datetime',
    color='name'
)
joaquin = alt.Chart(df_joaquin).mark_line().encode(
    alt.X('datetime', scale=alt.Scale(zero=False)),
    alt.Y('speed', scale=alt.Scale(zero=False)),
    order='datetime',
    color='name'
    )
maria + joaquin


In [1059]:
min_len = min(len(df_maria), len(df_joaquin))
diff_df = pd.DataFrame({
    'datetime': df_maria['datetime'].reset_index(drop=True)[:min_len],
    'speed_diff': (df_joaquin['speed'].reset_index(drop=True)[:min_len].values -
                   df_maria['speed'].reset_index(drop=True)[:min_len].values
                   )
})
speed_diff_chart = alt.Chart(diff_df).mark_line().encode(
    alt.X('datetime', scale=alt.Scale(zero=False)),
    alt.Y('speed_diff', scale=alt.Scale(zero=False)),
    order='datetime'
)

speed_diff_chart

In [1060]:
df_maria = df_maria.set_index('datetime').groupby('identifier')['speed'].resample('3h').mean().interpolate()
df_joaquin = df_joaquin.set_index('datetime').groupby('identifier')['speed'].resample('3h').mean().interpolate()

In [None]:
min_len = min(len(df_maria), len(df_joaquin))
diff_df = pd.DataFrame({
    'datetime': df_maria.reset_index()['datetime'][:min_len],
    'speed_diff': (df_joaquin.reset_index(drop=True)[:min_len].values -
                   df_maria.reset_index(drop=True)[:min_len].values
                   )
})
speed_diff_chart = alt.Chart(diff_df).mark_line().encode(
    alt.X('datetime', scale=alt.Scale(zero=False)),
    alt.Y('speed_diff', scale=alt.Scale(zero=False)),
    order='datetime'
)

speed_diff_chart