import plotly as py
# ^^^ pyforest auto-imports - don't write above this line
# NHL Official API Calls

# Imports

In [1]:
import requests
import pandas as pd
from functions import *
import pickle
import wikipedia
%run functions.py

# Making calls

## Getting Team Info

In [2]:
# I store this data in the functions file
all_teams = make_bare_api_call("teams")
team_ids = {item['name'] : item['id'] for item in all_teams['teams']}

In [3]:
# I saved the team ids and their names using pickle
with open('data/team_id_dict.pickle', 'wb') as handle:
    pickle.dump(team_ids, handle, protocol=pickle.HIGHEST_PROTOCOL)

## Putting all player info into df

In [4]:
all_players_df = get_all_player_ids_into_df()

In [5]:
all_players_df2 = add_player_bithplace_to_df(all_players_df)

### Adding Team names to dataframe

In [6]:
team_id_values = list(team_ids.values())
team_id_index = [team_id_values.index(x) for x in all_players_df2.team_id]

In [7]:
team_id_keys = list(team_ids.keys())
all_players_df2['team_name'] = [team_id_keys[x] for x in team_id_index]

## Saving DataFrame 

In [8]:
all_players_df2.to_csv("data/player_info_df.csv")

# Getting Location of each team

In [9]:
team_arenas = {item['name'] : item['venue']['name'] for item in all_teams['teams']}

In [11]:
# adding the coordinates to a list and identifying the ones with errors
coordinates_from_wikipedia = []
for title in wikipedia_titles:
    full_coords = None
    if title != 'Google Search':
        try:
            coords = wikipedia.WikipediaPage(title=title).coordinates
            str_lat = str(round(float(coords[0]), 4))
            str_lng = str(round(float(coords[1]), 4))
            full_coords = str_lat + ',' + str_lng
        except KeyError:
            full_coords = 'KeyError'
    coordinates_from_wikipedia.append(full_coords)

In [12]:
# Getting the indices and types of errors for the missing arenas
keyerror_indices = [idx for idx, x in enumerate(coordinates_from_wikipedia) if x == 'KeyError']
none_indices = [idx for idx, x in enumerate(wikipedia_titles) if x == 'Google Search']
all_missing_indices = keyerror_indices + none_indices
print(f"We are missing {len(all_missing_indices)} arena coordinates")
all_missing_indices

We are missing 3 arena coordinates


[1, 5, 27]

In [13]:
## this helps me view the arenas that I'm missing
for idx, x in enumerate(team_arenas.values()):
    if idx in all_missing_indices:
        print(x)

Nassau Veterans Memorial Coliseum
TD Garden
Xcel Energy Center


In [14]:
# this helps me correct the coordinate format I get from google. I ran this for each missing coordinate I found online
coords = turn_coords_to_correct_format("44.9446° N, 93.1002° W")
coords

'44.9446,-93.1002'

In [16]:
# saving the coordinates
arena_coord_list = ['40.7233,-73.5907', '42.3662,-71.0621', '44.9446,-93.1002']

In [17]:
# re-adding the coordinates to the coordinate list
counter = 0
while counter < len(arena_coord_list):
    idx_to_replace = all_missing_indices[counter]
    coordinates_from_wikipedia[idx_to_replace] = arena_coord_list[counter]
    counter += 1

In [18]:
arena_df = pd.DataFrame.from_dict(team_arenas, orient='index')

In [19]:
# adding the coordinates
arena_df['Coordinates'] = coordinates_from_wikipedia
arena_df.columns = ['Arena_name', 'Coordinates']
# adding "canadian_arenas" feature
canadian_arenas = ["Bell Centre", "Canadian Tire Centre", "Scotiabank Arena", "Scotiabank Saddledome",
                  "Rogers Place", "Rogers Arena", "Bell MTS Place"]
canadian_teams = [True if x in canadian_arenas else False for x in arena_df['Arena_name']]
arena_df['Canadian_team'] = canadian_teams

## Saving Arena Info

In [21]:
arena_df.to_csv("data/arena_df.csv")