# Plot and Analyze the Closest Team to each player

# Imports

In [1]:
import pandas as pd
from functions import *
%run functions.py
import folium 

## Data

In [2]:
player_df = pd.read_csv("data/all_players_full_info.csv", index_col=0)
arena_df = pd.read_csv("data/arena_df.csv")
arena_df.columns = ['Team_name','Arena_name', 'Coordinates', 'Canadian_team']

In [3]:
player_df.head()

Unnamed: 0,id,fullName,link,position,team_id,birthCity,birthStateProvince,birthCountry,nationality,team_name,city_province,coordinates
0,8479414,Nathan Bastian,/api/v1/people/8479414,RW,1,Kitchener,ON,CAN,CAN,New Jersey Devils,"Kitchener,ON","43.4186,-80.4728"
1,8481740,Tyce Thompson,/api/v1/people/8481740,RW,1,Calgary,AB,CAN,CAN,New Jersey Devils,"Calgary,AB","51.05,-114.0667"
2,8471233,Travis Zajac,/api/v1/people/8471233,C,1,Winnipeg,MB,CAN,CAN,New Jersey Devils,"Winnipeg,MB","49.8844,-97.1464"
3,8474056,P.K. Subban,/api/v1/people/8474056,D,1,Toronto,ON,CAN,CAN,New Jersey Devils,"Toronto,ON","43.7417,-79.3733"
4,8475809,Scott Wedgewood,/api/v1/people/8475809,G,1,Brampton,ON,CAN,CAN,New Jersey Devils,"Brampton,ON","43.6833,-79.7667"


### Adding Coordinates to each df

In [4]:
player_df.coordinates[0].split(',')

['43.4186', '-80.4728']

In [5]:
player_df['Lat'] = [x.split(',')[0] for x in player_df.coordinates]
player_df['Lng'] = [x.split(',')[1] for x in player_df.coordinates]

In [6]:
arena_df['Lat'] = [x.split(',')[0] for x in arena_df.Coordinates]
arena_df['Lng'] = [x.split(',')[1] for x in arena_df.Coordinates]

# EDA

## Count of players to teams in each country

In [7]:
# which country has the most players per team in country?
can_players = player_df[player_df['birthCountry'] == 'CAN']
us_players = player_df[player_df['birthCountry'] == 'USA']
count_of_can_teams = int(arena_df['Canadian_team'].sum())
count_of_amer_teams = arena_df.shape[0] - count_of_can_teams

In [8]:
# number of canadians per canadian teams = ~51
can_players.shape[0] / count_of_can_teams

50.857142857142854

In [9]:
# number of americans per american team = ~9
us_players.shape[0] / count_of_amer_teams

9.416666666666666

## How many players per province/state?

In [10]:
players_by_prov_state = player_df.groupby('birthStateProvince').count()['id']

In [11]:
# Canadian provinces make up the top 4 values
players_by_prov_state.sort_values(ascending=False)[0:5]

birthStateProvince
ON    164
QC     48
AB     43
BC     41
MN     34
Name: id, dtype: int64

In [12]:
players_by_prov_state.sort_values()[0:5]

birthStateProvince
YT    1
AK    1
AL    1
UT    1
SC    1
Name: id, dtype: int64

In [13]:
# 42 different states or provinces represented! 
players_by_prov_state.count()

42

# Plotting Birthplaces

In [14]:
# turn_coords_to_correct_format("47.1428° N, 99.7812° W")

In [15]:
# center of us and canada apprently is in north dakota (Robinson, N.D)
# according to https://www.nytimes.com/2017/01/25/science/north-america-geographical-center-north-dakota.html
# this is a little too high, so I'll make it lower
center_coords_str = '47.1428,-99.7812'
center_lat = float(center_coords_str[0:7]) - 4
center_lng = float(center_coords_str[-8:]) 

In [16]:
map2 = folium.Map(location=[center_lat, center_lng], zoom_start=4)

In [17]:
# adding players
for idx in range(len(player_df)):
    player_name = "<i>" + player_df['team_name'][idx] + "</i>"
    folium.CircleMarker(
    [player_df['Lat'][idx], player_df['Lng'][idx]], 
        radius=2, line_color='black', fill_color='gray', fill_opacity=0.1,
        popup=player_name, tooltip=player_df['fullName'][idx]
    ).add_to(map2)
# adding arenas
for idx in range(len(arena_df)):
    arena_name = "<i>" + arena_df['Arena_name'][idx] + "</i>"
    folium.CircleMarker(
    [arena_df['Lat'][idx], arena_df['Lng'][idx]], 
        radius=15, line_color='red', fill_color='red', fill_opacity=0.4,
        popup=arena_name, tooltip=arena_df['Team_name'][idx]
    ).add_to(map2)

In [18]:
map2

# Saving Data

In [19]:
map2.save("data/player_and_arena_locations.html")

In [20]:
player_df.to_csv("data/all_players_w_locations.csv")
arena_df.to_csv("data/arenas_w_locations.csv")