In [99]:
import json
import pandas as pd

In [100]:
with open('bos2019_with_locations.json') as f:
    json_data = json.load(f)

In [101]:
df = pd.DataFrame(json_data)

In [102]:
df.head()

Unnamed: 0,game_date,team_abbr,opponent_abbr,team_points,opponent_points,game_lat,game_lon
0,2018-10-03,BOS,WSH,0,7,38.89806,-77.02083
1,2018-10-04,BOS,BUF,4,0,42.875,-78.87639
2,2018-10-08,BOS,OTT,6,3,42.3662019,-71.0643293
3,2018-10-11,BOS,EDM,4,1,42.3662019,-71.0643293
4,2018-10-13,BOS,DET,8,2,42.3662019,-71.0643293


In [103]:
%pip install H3

Note: you may need to restart the kernel to use updated packages.


In [104]:
import h3

In [105]:
game_0, game_1 = json_data[0], json_data[1]

In [106]:
def get_coords_from_game(game):
    return (float(game['game_lat']), float(game['game_lon']))

In [107]:
game_0_coords = get_coords_from_game(game_0)
game_1_coords = get_coords_from_game(game_1)

In [108]:
game_0_coords

(38.89806, -77.02083)

In [109]:
distance = h3.point_dist(game_0_coords, game_1_coords, unit='km')

In [110]:
miles_conv_factor = 0.621371

miles = distance * miles_conv_factor

miles

In [111]:
miles

291.35217932482266

In [112]:
df['game_lat'] = pd.to_numeric(df['game_lat'])
df['game_lon'] = pd.to_numeric(df['game_lon'])

# df['coords'] = list(zip(df.game_lat, df.game_lon))

# df.drop(['game_lat', 'game_lon'], axis=1, inplace=True)

In [113]:
df

Unnamed: 0,game_date,team_abbr,opponent_abbr,team_points,opponent_points,game_lat,game_lon
0,2018-10-03,BOS,WSH,0,7,38.898060,-77.020830
1,2018-10-04,BOS,BUF,4,0,42.875000,-78.876390
2,2018-10-08,BOS,OTT,6,3,42.366202,-71.064329
3,2018-10-11,BOS,EDM,4,1,42.366202,-71.064329
4,2018-10-13,BOS,DET,8,2,42.366202,-71.064329
...,...,...,...,...,...,...,...
77,2019-03-30,BOS,FLA,1,4,42.366202,-71.064329
78,2019-03-31,BOS,DET,3,6,42.325280,-83.051390
79,2019-04-02,BOS,CBJ,6,2,39.969283,-83.006111
80,2019-04-04,BOS,MIN,3,0,44.944720,-93.101110


In [120]:
def calc_miles_distance(origin_lat, origin_lon, destination_lat, destination_lon):
    
    if any(coord is None for coord in [origin_lat, origin_lon, destination_lat, destination_lon]):
        return None
    
    origin_coords = (origin_lat, origin_lon)
    destination_coords = (destination_lat, destination_lon)
    
    km_distance = h3.point_dist(origin_coords, destination_coords, unit='km')
    miles_conversion_factor = 0.621371
    miles_distance = km_distance * miles_conversion_factor
    
    return miles_distance

In [115]:
# df['prev_coords'] = df['coords'].shift(1)

df['prev_game_lat'] = df['game_lat'].shift(1)
df['prev_game_lon'] = df['game_lon'].shift(1)

In [116]:
df.head()

Unnamed: 0,game_date,team_abbr,opponent_abbr,team_points,opponent_points,game_lat,game_lon,prev_game_lat,prev_game_lon
0,2018-10-03,BOS,WSH,0,7,38.89806,-77.02083,,
1,2018-10-04,BOS,BUF,4,0,42.875,-78.87639,38.89806,-77.02083
2,2018-10-08,BOS,OTT,6,3,42.366202,-71.064329,42.875,-78.87639
3,2018-10-11,BOS,EDM,4,1,42.366202,-71.064329,42.366202,-71.064329
4,2018-10-13,BOS,DET,8,2,42.366202,-71.064329,42.366202,-71.064329


In [118]:
# default to td garden for first game of year
df['prev_game_lat'].fillna(42.3662019, inplace=True)
df['prev_game_lon'].fillna(-71.0643293, inplace=True)

In [119]:
df.head()

Unnamed: 0,game_date,team_abbr,opponent_abbr,team_points,opponent_points,game_lat,game_lon,prev_game_lat,prev_game_lon
0,2018-10-03,BOS,WSH,0,7,38.89806,-77.02083,42.366202,-71.064329
1,2018-10-04,BOS,BUF,4,0,42.875,-78.87639,38.89806,-77.02083
2,2018-10-08,BOS,OTT,6,3,42.366202,-71.064329,42.875,-78.87639
3,2018-10-11,BOS,EDM,4,1,42.366202,-71.064329,42.366202,-71.064329
4,2018-10-13,BOS,DET,8,2,42.366202,-71.064329,42.366202,-71.064329


In [122]:
df['dist_traveled'] = df.apply(lambda row: calc_miles_distance(origin_lat=row['prev_game_lat'], origin_lon=row['prev_game_lon'], destination_lat=row['game_lat'], destination_lon=row['game_lon']), axis =1 )

In [123]:
df.head()

Unnamed: 0,game_date,team_abbr,opponent_abbr,team_points,opponent_points,game_lat,game_lon,prev_game_lat,prev_game_lon,dist_traveled
0,2018-10-03,BOS,WSH,0,7,38.89806,-77.02083,42.366202,-71.064329,393.495276
1,2018-10-04,BOS,BUF,4,0,42.875,-78.87639,38.89806,-77.02083,291.352179
2,2018-10-08,BOS,OTT,6,3,42.366202,-71.064329,42.875,-78.87639,398.593082
3,2018-10-11,BOS,EDM,4,1,42.366202,-71.064329,42.366202,-71.064329,0.0
4,2018-10-13,BOS,DET,8,2,42.366202,-71.064329,42.366202,-71.064329,0.0


In [124]:
df

Unnamed: 0,game_date,team_abbr,opponent_abbr,team_points,opponent_points,game_lat,game_lon,prev_game_lat,prev_game_lon,dist_traveled
0,2018-10-03,BOS,WSH,0,7,38.898060,-77.020830,42.366202,-71.064329,393.495276
1,2018-10-04,BOS,BUF,4,0,42.875000,-78.876390,38.898060,-77.020830,291.352179
2,2018-10-08,BOS,OTT,6,3,42.366202,-71.064329,42.875000,-78.876390,398.593082
3,2018-10-11,BOS,EDM,4,1,42.366202,-71.064329,42.366202,-71.064329,0.000000
4,2018-10-13,BOS,DET,8,2,42.366202,-71.064329,42.366202,-71.064329,0.000000
...,...,...,...,...,...,...,...,...,...,...
77,2019-03-30,BOS,FLA,1,4,42.366202,-71.064329,42.366202,-71.064329,0.000000
78,2019-03-31,BOS,DET,3,6,42.325280,-83.051390,42.366202,-71.064329,611.635768
79,2019-04-02,BOS,CBJ,6,2,39.969283,-83.006111,42.325280,-83.051390,162.800815
80,2019-04-04,BOS,MIN,3,0,44.944720,-93.101110,39.969283,-83.006111,618.139490
