# Finding the closest arena to each player's birthplace

- I'm following the work done in this github repo: https://github.com/shakasom/NearestNeighbour-Analysis/blob/master/NNA.ipynb

# Imports 

In [6]:
import pandas as pd
from functions import *
%run functions.py
import folium
import geopandas as gpd
import shapely
from haversine import haversine, Unit # I could have used geopy.distance, too
# could have used OSRM API (http://project-osrm.org/docs/v5.5.1/api/#route-service) to get driving distance
from shapely.geometry import LineString, Point

## Data

In [7]:
player_df = pd.read_csv("data/all_players_w_locations.csv", index_col=0)
arena_df = pd.read_csv("data/arenas_w_locations.csv", index_col=0)

In [8]:
player_df.head(2)

Unnamed: 0,id,fullName,link,position,team_id,birthCity,birthStateProvince,birthCountry,nationality,team_name,city_province,coordinates,Lat,Lng
0,8479414,Nathan Bastian,/api/v1/people/8479414,RW,1,Kitchener,ON,CAN,CAN,New Jersey Devils,"Kitchener,ON","43.4186,-80.4728",43.4186,-80.4728
1,8481740,Tyce Thompson,/api/v1/people/8481740,RW,1,Calgary,AB,CAN,CAN,New Jersey Devils,"Calgary,AB","51.05,-114.0667",51.05,-114.0667


In [9]:
arena_df.head(2)

Unnamed: 0,Team_name,Arena_name,Coordinates,Canadian_team,Lat,Lng
0,New Jersey Devils,Prudential Center,"40.7336,-74.1711",False,40.7336,-74.1711
1,New York Islanders,Nassau Veterans Memorial Coliseum,"40.7233,-73.5907",False,40.7233,-73.5907


# Converting files to Geodataframes

In [10]:
player_gdf = create_gdf(player_df)
arena_gdf = create_gdf(arena_df)

  return _prepare_from_string(" ".join(pjargs))


# Finding Nearest Neighbors

In [11]:
player_gdf["nearest_geom"] = player_gdf.apply(calculate_nearest, destination=arena_gdf, val="geometry", axis=1)
player_gdf["nearest_team"] = player_gdf.apply(calculate_nearest, destination=arena_gdf, val="Team_name", axis=1)

## Calculating distance between birthplace and arena

In [12]:
player_gdf['haversine_distance'] = [haversine([player_gdf['geometry'][idx].x, player_gdf['geometry'][idx].y],
                                             [player_gdf['nearest_geom'][idx].x, player_gdf['nearest_geom'][idx].y],
                                             unit=Unit.MILES) for idx in range(len(player_gdf))]

In [25]:
player_gdf['close_to_any_team'] = [True if player_gdf['haversine_distance'][idx] < 60 else False 
                               for idx in range(len(player_gdf))]
print(f"About {round(player_gdf.close_to_any_team.sum() / player_gdf.shape[0], 2)}% of North American players are born close to an NHL team")

About 0.68% of North American players are born close to an NHL team


## Adding Lines to closest team

In [15]:
# You need to flip each coordinate for linestring
flipped_geom = [Point(coord.y, coord.x) for coord in player_gdf['geometry']]
flipped_nearest_geom = [Point(coord.y, coord.x) for coord in player_gdf['nearest_geom']]
line_2 = [LineString([flipped_geom[idx], flipped_nearest_geom[idx]]) for idx in range(len(flipped_geom))]
player_gdf['line'] = line_2

### Making new gdf for the lines

In [16]:
line_gdf = player_gdf[["fullName", 'city_province', "nearest_team", "line"]].set_geometry('line')
# Set the Coordinate reference
line_gdf.crs = crs={"init":"epsg:4326"}

  return _prepare_from_string(" ".join(pjargs))


# Mapping the data

In [17]:
center_coords_str = '47.1428,-99.7812'
center_lat = float(center_coords_str[0:7]) - 4
center_lng = float(center_coords_str[-8:]) 
my_map = folium.Map(location=[center_lat, center_lng], zoom_start=4)

In [19]:
# adding the data
# adding players (I can't differentiate the color of the dots)
for idx in range(len(player_gdf)):
    player_name = "<i>" + player_gdf['team_name'][idx] + "</i>"
    folium.CircleMarker(
    [player_gdf['Lat'][idx], player_gdf['Lng'][idx]], 
        radius=2, line_color='black', fill_color='gray', fill_opacity=0.1,
        popup=player_name, tooltip=player_gdf['fullName'][idx]
    ).add_to(my_map)
# adding arenas
for idx in range(len(arena_df)):
    arena_name = "<i>" + arena_df['Arena_name'][idx] + "</i>"
    folium.CircleMarker(
    [arena_df['Lat'][idx], arena_df['Lng'][idx]], 
        radius=15, line_color='red', fill_color='red', fill_opacity=0.4,
        popup=arena_name, tooltip=arena_df['Team_name'][idx]
    ).add_to(my_map)
# the lines 
folium.GeoJson(line_gdf).add_to(my_map)


<folium.features.GeoJson at 0x7fad4e5284d0>

In [20]:
my_map

## Saving the data 

In [21]:
# my_map.save("data/players_to_teams_w_lines.html")

# player_gdf.to_csv("data/player_gdf.csv")