<h1>NHL Player Data and Statistics</h1>

To make better comparisons, pull data for each player's past two seasons of on-ice performance and "vitals" (height, weight, age, etc.)

In [2]:
import numpy as np
import pandas as pd
import os
import io
import seaborn as sns
import requests
import json

import matplotlib.pyplot as plt
import pymc3 as pm

import scipy
import scipy.stats as stats
import scipy.optimize as opt
#import statsmodels.api as sm

pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)
plt.style.use('seaborn-darkgrid')
pd.set_option('display.max_columns', 500)
%matplotlib inline 

In [3]:
data = pd.read_csv("data/combined_seasons.csv")

In [4]:
data.columns

Index(['Unnamed: 0', 'Assist1', 'Assist1_ID', 'Assist2', 'Assist2_ID',
       'Assists2_ID', 'Away', 'Away_Score', 'EmptyNet', 'Event', 'EventID',
       'Event_Description', 'Event_Team', 'Event_Type_ID', 'Final_Away_Score',
       'Final_Home_Score', 'GameDate', 'GameID', 'GameState', 'GameType',
       'Goalie', 'Goalie_ID', 'Home', 'Home_Score', 'Period', 'Period_Type',
       'Player_1_ID', 'Player_1_Name', 'Player_2_ID', 'Player_2_Name',
       'Scorer', 'Scorer_ID', 'Shooter', 'Shooter_ID', 'ShotType',
       'Time_Remaining', 'Time_in_Period', 'Venue', 'x', 'y', 'Side', 'Season',
       'y_stdz', 'x_stdz', 'y_normal', 'x_normal', 'Event_Team_Adj', 'y_same',
       'x_same', 'zone_x', 'zone_y', 'shot_zone', 'Opposition Team',
       'Shifted_Event_Type_ID', 'Shifted_shot_zone', 'Shifted_Time_Remaining',
       'Event_Time_Difference', 'Shot_Context'],
      dtype='object')

In [5]:
players = data[['Player_1_Name', 'Player_1_ID', 'Season']].groupby(['Player_1_Name', 'Player_1_ID', 'Season']).count().reset_index()

In [9]:
#Create Function for getting Player Stats by season

def get_player_vitals(season, players):
    name = []
    primaryNumber = []
    birthDate = []
    #currentAge = []
    birthCity = []
    birthStateProvince = []
    birthCountry = []
    nationality = []
    height = []
    weight = []
    active = []
    alternateCaptain = []
    captain = []
    rookie = []
    shootsCatches = []
    rosterStatus = []
    currentTeam = []
    primaryPosition = []

    season_num = season.replace('-', '')
    for player in players[players['Season'] == str(season)]['Player_1_ID']:
        if player != 'None':
            r = requests.get('https://statsapi.web.nhl.com/api/v1/people/'+ player + '?season='+ str(season_num))
            data = r.json()
            data = data['people'][0]

            try:
                name.append(data['fullName'])
            except:
                name.append('NA')
            try:
                primaryNumber.append(data['primaryNumber'])
            except:
                primaryNumber.append('NA')
            try:
                birthDate.append(data['birthDate'])
            except:
                birthDate.append('NA')
            #currentAge.append(data['currentAge'])
            try:
                birthCity.append(data['birthCity'])
            except:
                birthCity.append('NA')
            try:
                birthStateProvince.append(data['birthStateProvince'])
            except:
                birthStateProvince.append('NA')
            try:
                birthCountry.append(data['birthCountry'])
            except:
                birthCountry.append('NA')
            try:
                nationality.append(data['nationality'])
            except:
                nationality.append('NA')
            try:
                height.append(data['height'])
            except:
                height.append('NA')
            try:
                weight.append(data['weight'])
            except:
                weight.append('NA')
            try:
                active.append(data['active'])
            except:
                active.append('NA')
            try:
                alternateCaptain.append(data['alternateCaptain'])
            except:
                alternateCaptain.append('NA')
            try:
                captain.append(data['captain'])
            except:
                captain.append('NA')
            try:
                rookie.append(data['rookie'])
            except:
                rookie.append('NA')
            try:
                shootsCatches.append(data['shootsCatches'])
            except:
                shootsCatches.append('NA')
            try:
                rosterStatus.append(data['rosterStatus'])
            except:
                rosterStatus.append('NA')
            try:
                currentTeam.append(data['currentTeam']['name'])
            except:
                currentTeam.append('NA')
            try:
                primaryPosition.append(data['primaryPosition']['abbreviation'])
            except:
                primaryPosition.append('NA')
    
    player_vitals = pd.DataFrame(
    {'Name': name, 'PrimaryNumber': primaryNumber, 'BirthDate': birthDate, 'BirthCity': birthCity,
    'BirthState': birthStateProvince, 'BirthCountry': birthCountry, 'Nationality': nationality,
    'Height': height, 'Weight': weight, 'Active': active, 'AlternateCaptain':alternateCaptain,
    'Captain': captain, 'Rookie': rookie, 'Shoots_Catches': shootsCatches, 'RosterStatus': rosterStatus,
    'CurrentTeam': currentTeam, 'Position': primaryPosition})

    return player_vitals

In [10]:
player_vitals_20172018 = get_player_vitals('2017-2018', players)
player_vitals_20182019 = get_player_vitals('2018-2019', players)

In [12]:
def get_player_stats_shooters(season, players):
    season_num = str(season).replace('-', '')
    r = requests.get('https://statsapi.web.nhl.com/api/v1/people/'+ '8478421/stats' + '?stats=statsSingleSeason&season=' + season_num)
    data = r.json()
    stats_dict = {}
    for stat in data['stats'][0]['splits'][0]['stat']:
        stats_dict[stat] = []
    stats_dict['PlayerID'] = []

    for player in players[players['Season'] == str(season)]['Player_1_ID']:
        ###Now we need some stats!
        r = requests.get('https://statsapi.web.nhl.com/api/v1/people/'+ player + '/stats?stats=statsSingleSeason&season=' + season_num)
        data = r.json()

        for key in stats_dict.keys():
            if key == 'PlayerID':
                stats_dict[key].append(player)
            else:
                try:
                    stats_dict[key].append(data['stats'][0]['splits'][0]['stat'][key])
                except:
                    stats_dict[key].append('NA')

    player_stats_shooters = pd.DataFrame(stats_dict)

    player_stats_shooters = player_stats_shooters.merge(players[['Player_1_ID', 'Player_1_Name']].drop_duplicates(),
                                                       left_on=['PlayerID'], right_on=['Player_1_ID'],
                                                       how='left')
    return player_stats_shooters

In [13]:
shooter_stats_20172018 = get_player_stats_shooters('2017-2018', players)
shooter_stats_20182019 = get_player_stats_shooters('2018-2019', players)

In [16]:
#Write Data to Disc
shooter_stats_20172018.to_csv('data/shooter_stats_20172018.csv', index=False)
shooter_stats_20182019.to_csv('data/shooter_stats_20182019.csv', index=False)
player_vitals_20172018.to_csv('data/player_vitals_20172018.csv', index=False)
player_vitals_20182019.to_csv('data/player_vitals_20182019.csv', index=False)