In [None]:
import requests
import json
import pandas as pd
import plotly.graph_objs as go
from plotly.subplots import make_subplots

In [None]:
def prospect_dob(playerId):
    '''
    prospect_dob gets the prospects API data and returns the date of birth.

    :param playerId: is the id of the prospect
    :return: date of birth
    '''
    if playerId == '':
        return ''
    
    r = requests.get('https://statsapi.web.nhl.com/api/v1/draft/prospects/{}'.format(playerId))
    jason = json.loads(r.text)
    
    # navigate to the the DOB of the prospect/player
    dob = jason['prospects'][0]['birthDate']
    return dob

In [None]:
# set the variables and length of history to be analyzed
current_year = 2023
past_years_to_analyze = 30

analyzed_drafts = list(range(current_year - past_years_to_analyze, current_year))

# print oldest and most recent draft class to be analyzed
print('The range of analyzed NHL drafts is: ' + str(analyzed_drafts[0]) + ' - ' + str(analyzed_drafts[-1]) + '.')

In [None]:
# create empty dataframe which will be later appended for draft data
final_df = pd.DataFrame()

for yr in analyzed_drafts:
    
    print('Requesting data for draft ' + str(yr) + '.')
    # request the draft data for the draft year and turn data to JSON
    r = requests.get('https://statsapi.web.nhl.com/api/v1/draft/{}'.format(yr))
    jason = json.loads(r.text)
    
    # create dataframe from data
    df = pd.DataFrame.from_records(jason['drafts'][0])
    
    # unpack the first ('rounds') level, concat and drop 
    df_add = df['rounds'].apply(pd.Series)
    df = pd.concat([df, df_add], axis=1).drop('rounds', axis=1)

    # the picks column is list of dictonaries, so explode
    df = df.explode('picks')

    # unpack the next level ('picks'), concat and drop 
    df_add = df['picks'].apply(pd.Series)
    df = pd.concat([df, df_add], axis=1).drop('picks', axis=1)

    # unpack the 'team' level
    df_add = df['team'].apply(pd.Series)
    # rename columns to make them unique, change the dtype of some of them
    df_add = df_add.rename(columns={'id': 'teamId', 'name': 'teamName', 'link': 'teamLink'})
    df_add['teamId'] = df_add['teamId'].astype(int)
    # concat and drop 
    df = pd.concat([df, df_add], axis=1).drop('team', axis=1)


    # unpack the 'prospect' level
    df_add = df['prospect'].apply(pd.Series)
    # rename columns to make them unique to players
    df_add = df_add.rename(columns={'link': 'playerLink', 'id': 'playerId'})
    # some of the draft pick gets void or rorfeited sometimes, so this has to be taken care of
    df_add['playerId'] = df_add['playerId'].apply(lambda x: '' if pd.isnull(x) else str(int(x)))
    # concat and drop 
    df = pd.concat([df, df_add], axis=1).drop('prospect', axis=1)

    # drop unnecessary columns
    df = df.drop(columns=['round','year','round', 'teamLink', 'playerLink'], axis=1)

    print('Requesting date of birth data for ' + str(yr) + ' draft class.')
    df['playerBirthDate'] = df['playerId'].apply(prospect_dob)
    df['playerBirthDate'] = pd.to_datetime(df['playerBirthDate'])
    
    # get the month from the date
    df['playerBirthDateMonth'] = df['playerBirthDate'].dt.month
    
    final_df = final_df.append(df)
 
print('Data collected, done!')
final_df.head()

In [None]:
# save the data to CSV 
final_df.to_csv('nhl_draft_dobs.csv')

In [None]:
# read the CSV to prevent repeated json downloads
final_df = pd.read_csv('nhl_draft_dobs.csv')

## Data Quality Stats

In [None]:
# get stats on the dataset
final_df.shape

In [None]:
# exclude all picks whirch are void
dq_df = final_df[final_df['fullName'] != 'Void']

dq_df = dq_df[['draftYear','fullName','playerBirthDate']].groupby(by=['draftYear'],as_index=False).count()
dq_df = dq_df.rename(columns={'fullName': 'countOfPicks', 'playerBirthDate': 'countOfPlayerBirthDates'})
dq_df['dobPercentage'] = round((dq_df['countOfPlayerBirthDates'] / dq_df['countOfPicks']) * 100, 1)

# produce the data quality statistics chart
fig = go.Figure([go.Bar(x=dq_df['draftYear'], y=dq_df['dobPercentage'])])

fig.update_layout(
    xaxis_title="Draft Years",
    yaxis_title="Percentage of obtained DOBs",
    xaxis=dict(autorange="reversed")
)

fig.show()

### Produce Single Chart

In [None]:
# prepare the the dataframe for plotting
plot_df = final_df[['draftYear','playerBirthDateMonth','fullName']].groupby(by=['draftYear','playerBirthDateMonth'],as_index=False).count()
plot_df = plot_df.rename(columns={'fullName': 'count'})
plot_df.head(20)

In [None]:
# months used for labels for angular axis
months = ['Jan', 'Feb', 'March', 'April', 'May', 'June', 'July', 'Aug', 'Sept', 'Oct', 'Nov', 'Dec']

fig = go.Figure()

# function to produce the polar chart with multiple traces for each draft class year
for yr in analyzed_drafts:

    filt = plot_df['draftYear'] == yr  #filter for plot dataframe
    plot_df[filt]
    
    r = plot_df[filt]['count'].to_list() #sets for radial coordinates
    
    fig.add_trace(go.Barpolar(
        r=r,
        theta=months,
        name=yr,
    ))

# resize the final plot
fig.update_layout(
    autosize=False,
    width=800,
    height=800,)

# update the layout of the chart
fig.update_layout(
    showlegend = True,
    legend={'title':'Draft Class'},
    font_size=16,
    legend_font_size=16,
    polar=dict(
        angularaxis=dict(
            direction='clockwise', #set the orientation of the traces
            rotation=75, #introduce the offset to the orientation
        ))
)
fig.show()

In [None]:
# stats on the findings

stat_df = plot_df[['playerBirthDateMonth','count']].groupby(by=['playerBirthDateMonth'],as_index=False).sum()
stat_df['mthPerc'] = (stat_df['count'] / stat_df['count'].sum()) * 100
stat_df['qtMthPerc']= stat_df['mthPerc'].rolling(window=3).sum()
stat_df

### Print Master Chart Without Years

In [None]:
# months used for labels for angular axis
months = ['Jan', 'Feb', 'March', 'April', 'May', 'June', 'July', 'Aug', 'Sept', 'Oct', 'Nov', 'Dec']

fig = go.Figure()

r = stat_df['count'].to_list()

fig.add_trace(go.Barpolar(
    r=r,
    theta=months,
))

# resize the final plot
fig.update_layout(
    autosize=False,
    width=800,
    height=800,)

# update the layout of the chart
fig.update_layout(
    title='NHL Drafted Prospects Month Of Birth',
    legend={'title':'Draft Class'},
    font_size=16,
    legend_font_size=16,
    polar=dict(
        angularaxis=dict(
            direction='clockwise', #set the orientation of the traces
            rotation=75, #introduce the offset to the orientation
        ))
)
fig.show()

### Produce Chart for Each Draft Class

In [None]:
# cols variable for how many polar bars should be in the row
cols = 5 
rows = len(analyzed_drafts)//cols + 1

fig = make_subplots(
    rows=rows, cols=cols,
    
    specs=[[dict([("type", "polar")])]*cols]*rows, #special construct to defice all charts as polar has to be same dimentions (rows, colums) as subplot
    subplot_titles=list(reversed(analyzed_drafts))) #reversal needed to start the charts left to right and top to bottom

# function to produce the polar charts for the subplot
for idx, yr in enumerate(list(reversed(analyzed_drafts))):
    cl = (idx + 1) - ((idx//cols) * cols) #column position function
    rw = idx//cols + 1 #row position function
    
    filt = plot_df['draftYear'] == yr #filter for plotting the dataframe
    plot_df[filt]
    
    r = plot_df[filt]['count'].to_list() #sets for radial coordinates
    
    fig.add_trace(go.Barpolar(
        r=r,
        theta=months,
        name=yr
    ),row=rw, col=cl)
    
    
fig.update_layout(
    showlegend = False,
    )

fig.update_layout(
    autosize=False,
    width=800,
    height=1800,)

# special construct to create polar_list ['polar', 'polar2', 'polar3', ...] for layout update. 
# this is specific to subplots that each subplot chart has to be referrenced separatelly
polar_list = [''] + list(range(2,len(analyzed_drafts) + 1))
polar_list = ['polar' + str(i) for i in polar_list]

# fig's layout properties can be accessd by dict and .update method can be used
for plr in polar_list:
    fig['layout'][plr].update(dict(
                                angularaxis = dict(rotation=75,direction='clockwise',showticklabels=False),
                                radialaxis = dict(showticklabels=False),
    ))


fig.show()

## Produce Chart of Firts Overalls

In [None]:
# filter for just first overall
filt = final_df['pickOverall'] == 1
plot2_df = final_df[filt]

plot2_df = plot2_df[['draftYear','playerBirthDateMonth']].groupby(by=['playerBirthDateMonth'],as_index=False).count()
plot2_df = plot2_df.rename(columns={'draftYear': 'count'})

ref_df = pd.DataFrame(list(range(1,13)), columns=['month'])

plot2_df = pd.merge(ref_df, plot2_df, how='left', left_on='month', right_on='playerBirthDateMonth')

plot2_df = plot2_df.drop(columns='playerBirthDateMonth')
plot2_df= plot2_df.fillna(0)

plot2_df.head(30)

### Chart First Overall Data

In [None]:
# months used for labels for angular axis
months = ['Jan', 'Feb', 'March', 'April', 'May', 'June', 'July', 'Aug', 'Sept', 'Oct', 'Nov', 'Dec']

fig = go.Figure()

r = plot2_df['count'].to_list()

fig.add_trace(go.Barpolar(
    r=r,
    theta=months,
))

# resize the final plot
fig.update_layout(
    autosize=False,
    width=800,
    height=800,)

# update the layout of the chart
fig.update_layout(
    title='First Overall Picks Month Of Birth',
    legend={'title':'Draft Class'},
    font_size=16,
    legend_font_size=16,
    polar=dict(
        angularaxis=dict(
            direction='clockwise', #set the orientation of the traces
            rotation=75, #introduce the offset to the orientation
        ))
)
fig.show()