## 1. Data Ingestion

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import cvxpy as cp
import random

from google.colab import files
import warnings
warnings.filterwarnings('ignore')

url = 'https://docs.google.com/uc?export=download&id=1FeFY3WlHKSRj4thuyyariyhodBf7z1Zn'

# Read the data from the CSV file
df1 = pd.read_csv(url)

In [None]:
df1

Unnamed: 0,Team,Type,POS,Name,Specific POS,AGE,SH,ACQUIRED,GP,G,A,P,GAA,SV%,League,Notes,NHL eP,Goalie Equivalency
0,COL,Majors,RD,"Byram, Bowen",LD,22,L,Drafted (4 - 2019),42,10.0,14.0,24.0,,,NHL,,24,0.0
1,COL,Majors,F,"Foudy, Jean-Luc",C,21,R,Drafted (75 - 2020),46,11.0,25.0,36.0,,,AHL,,25,0.0
2,COL,Minors,F,"Olausson, Oskar",RW,20,L,Drafted (28 - 2021),63,11.0,9.0,20.0,,,AHL,,10,0.0
3,COL,Minors,F,"Beaucage, Alex",RW,22,R,Drafted (78 - 2019),63,8.0,12.0,20.0,,,AHL,,10,0.0
4,COL,Minors,F,"Pavel, Ondrej",C,23,L,Signed to ELC,39,6.0,9.0,15.0,,,NCAA,,6,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
915,MIN,Prospects,F,"Guskov, Matvei",C,22,L,Drafted (149 - 2019),37,3.0,6.0,9.0,,,KHL,,15,0.0
916,MIN,Prospects,F,"Petrovský, Servác",C,19,L,Drafted (185 - 2022),62,24.0,31.0,55.0,,,OHL,,10,0.0
917,MIN,Prospects,D,"Peart, Jack",LD,20,L,Drafted (54 - 2021),39,3.0,21.0,24.0,,,NCAA,,10,0.0
918,MIN,Prospects,D,"Healey, Ryan",RD,19,R,Drafted (121 - 2022),34,2.0,6.0,8.0,,,NCAA,,4,0.0


### Input Draft Order

In [None]:
url2 = 'https://docs.google.com/uc?export=download&id=10eslT5W0JmCHegnvHcbbeHYPjDB4lB-t'

# Read the data from the CSV file
df_draft = pd.read_csv(url2)

# Define the replacement mapping
replacement_mapping = {
    'Colorado Avalanche': 'COL', 'Chicago Blackhawks': 'CHI', 'St. Louis Blues': 'STL', 'Columbus Blue Jackets': 'CBJ', 'Boston Bruins':'BOS', 'Montreal Canadiens':'MTL',
    'Vancouver Canucks': 'VAN', 'Washington Capitals': 'WSH', 'Arizona Coyotes': 'ARI', 'New Jersey Devils': 'NJD', 'Anaheim Ducks': 'ANA', 'Calgary Flames': 'CGY', 'Philadelphia Flyers': 'PHI',
    'Carolina Hurricanes': 'CAR', 'New York Islanders': 'NYI', 'Winnipeg Jets': 'WPG', 'Los Angeles Kings': 'LAK', 'Vegas Golden Knights': 'VGK', 'Seattle Kraken': 'SEA',
    'Toronto Maple Leafs': 'TOR', 'Tampa Bay Lightning': 'TBL', 'Edmonton Oilers': 'EDM', 'Florida Panthers': 'FLA', 'Pittsburgh Penguins': 'PIT', 'Nashville Predators': 'NSH',
    'New York Rangers': 'NYR', 'Detroit Red Wings': 'DET', 'Buffalo Sabres': 'BUF', 'Ottawa Senators': 'OTT', 'San Jose Sharks': 'SJS', 'Dallas Stars': 'DAL', 'Minnesota Wild': 'MIN'
}

# Use the .loc function to replace values
df_draft.loc[df_draft['TEAM_NAME'].isin(replacement_mapping.keys()), 'TEAM_NAME'] = df_draft['TEAM_NAME'].map(replacement_mapping)
df_draft = df_draft['TEAM_NAME']
df_draft.head()

0    CHI
1    ANA
2    CBJ
3    SJS
4    MTL
Name: TEAM_NAME, dtype: object

### Input Full 2023 Draft Prospects

In [None]:
url3 = 'https://docs.google.com/uc?export=download&id=1trJT6rZE0RrqfWXK2AEwHMqPVMt24Ebd'

# Read the data from the CSV file
df_2023 = pd.read_csv(url3)
df_2023.head()

Unnamed: 0,Team,Type,POS,Name,Specific POS,AGE,SH,ACQUIRED,GP,G,A,P,GAA,SV%,League,Notes,NHL eP,Goalie Equivalency,PLAYER_ID
0,COL,Prospects,F,Calum Ritchie,C,18,R,Drafted (27 - 2023),59,24.0,35.0,59.0,,,OHL,,12,0.0,23
1,COL,Prospects,F,Maros Jedlicka,C,20,L,Drafted (219 - 2023),39,17.0,18.0,35.0,,,Slovak Extraliga,,22,0.0,276
2,COL,Prospects,D,Mikhail Gulyayev,LD,18,L,Drafted (31 - 2023),22,2.0,23.0,25.0,,,MHL,,13,0.0,34
3,COL,Prospects,D,Nikita Ishimnikov,RD,18,R,Drafted (155 - 2023),41,11.0,7.0,18.0,,,MHL,,5,0.0,209
4,COL,Prospects,D,Jeremy Hanzel,LD,20,L,Drafted (187 - 2023),66,13.0,35.0,48.0,,,WHL,,8,0.0,179


## 3. General Forwards Group and Ranking

In [None]:
def rank_pre_processing(df):
  '''
  Creates a dictionary with a dataframe filled with players in the league prospect pool for each position and age group.
  Players under the age of 19 are grouped with the 19 year old datafarames.

  Input: Prospect pool dataframe
  Output: Dictionary with multiple prospect pool dataframes for each age and position
  '''
  df.loc[(df['Specific POS']=='LW'), 'Specific POS'] = 'F'
  df.loc[(df['Specific POS']=='C'), 'Specific POS'] = 'F'
  df.loc[(df['Specific POS']=='RW'), 'Specific POS'] = 'F'

  ages = [19,20,21,22,23]
  positions = ['F','LD','RD','G']
  players = {}

  for position in positions:
    for age in ages:
      if age == 19:
        players['df_' + str(position) + '-' + str(age)] = df[(df['Specific POS']==position) & (df['AGE']<=age)]
      else:
        players['df_' + str(position) + '-' + str(age)] = df[(df['Specific POS']==position) & (df['AGE']==age)]

  return players

#### Group and Rank

In [None]:
def fill_missing_teams(df_dict, key, col):

    Teams = ['COL','CHI','CBJ','STL','BOS','MTL','VAN','WSH','ARI','NJD','ANA',
        'CGY','PHI','CAR','NYI','WPG','LAK','VGK','SEA','TOR','TBL','EDM','FLA',
        'PIT','NSH','NYR','DET','BUF','OTT','SJS','DAL','MIN']

    index_set = set(df_dict[key]['Team'])
    min_value = df_dict[key][col].min()

    # Find elements in lst that are not in the index
    missing_elements = [x for x in Teams if x not in index_set]

    # If there are missing elements, add them to the DataFrame with value 0 in column 'A'
    if missing_elements:
        missing_df = pd.DataFrame({col: [(min_value-1)]*len(missing_elements), 'Team':missing_elements})
        df_dict[key] = pd.concat([df_dict[key], missing_df])

    return df_dict

In [None]:
def score_and_rank(df_dict):
  df_dict_out = {}

  for i in df_dict:
    if len(df_dict[i])==1:
      df_dict[i]['zscore'] = 0
    else:
      if 'G' in i:
        df_dict[i]['zscore'] = (df_dict[i]['Goalie Equivalency'] - df_dict[i]['Goalie Equivalency'].mean())/df_dict[i]['Goalie Equivalency'].std()
      else:
        df_dict[i]['zscore'] = (df_dict[i]['NHL eP'] - df_dict[i]['NHL eP'].mean())/df_dict[i]['NHL eP'].std()

    df_dict = fill_missing_teams(df_dict, i, 'zscore')
    df_dict[i]['exp_zscore'] = np.exp(df_dict[i]['zscore'])

    df_dict_out[i] = df_dict[i].groupby('Team').sum('exp_zscore')
    df_dict_out[i]['rank'] = df_dict_out[i]['exp_zscore'].rank(pct=True)#ascending = False)
    df_dict_out[i].drop(columns=['AGE','GP','G','A','P','GAA','SV%','Goalie Equivalency','NHL eP','exp_zscore','zscore'], inplace = True)

  return df_dict_out

In [None]:
def group_ranker(rank_dfs, weights):

  result = pd.DataFrame()

  for key in rank_dfs:
    position, age = key.split('-')  # split the key into position and age
    weight = weights[int(age)]  # get the weight for this age

    df = rank_dfs[key].copy() # get the DataFrame for this key
    df['rank'] *= weight  # multiply the percentile rankings by the weight

    if position in result:
        result[position] += df['rank']  # add to the existing DataFrame for this position
    else:
        result[position] = df['rank']

  for key in result:
    result[key] = result[key].rank(pct=True)
    result[key] = 1 - result[key] # flips percentiles around for optimization model

  return result

## 4. Adjusting Rankings for Player Selection

In [None]:
def prospect_ranker(player_IDs):

  df_weight = {19: 0.85, 20: 0.7, 21: 0.55, 22: 0.4, 23: 0.25}
  draft_order = []

  df_draft = pd.read_csv(url2)
  df_2023 = pd.read_csv(url3)
  df1 = pd.read_csv(url)

  # Define the replacement mapping
  replacement_mapping = {
    'Colorado Avalanche': 'COL', 'Chicago Blackhawks': 'CHI', 'St. Louis Blues': 'STL', 'Columbus Blue Jackets': 'CBJ', 'Boston Bruins':'BOS', 'Montreal Canadiens':'MTL',
    'Vancouver Canucks': 'VAN', 'Washington Capitals': 'WSH', 'Arizona Coyotes': 'ARI', 'New Jersey Devils': 'NJD', 'Anaheim Ducks': 'ANA', 'Calgary Flames': 'CGY', 'Philadelphia Flyers': 'PHI',
    'Carolina Hurricanes': 'CAR', 'New York Islanders': 'NYI', 'Winnipeg Jets': 'WPG', 'Los Angeles Kings': 'LAK', 'Vegas Golden Knights': 'VGK', 'Seattle Kraken': 'SEA',
    'Toronto Maple Leafs': 'TOR', 'Tampa Bay Lightning': 'TBL', 'Edmonton Oilers': 'EDM', 'Florida Panthers': 'FLA', 'Pittsburgh Penguins': 'PIT', 'Nashville Predators': 'NSH',
    'New York Rangers': 'NYR', 'Detroit Red Wings': 'DET', 'Buffalo Sabres': 'BUF', 'Ottawa Senators': 'OTT', 'San Jose Sharks': 'SJS', 'Dallas Stars': 'DAL', 'Minnesota Wild': 'MIN'
  }

  # Use the .loc function to replace values
  df_draft.loc[df_draft['TEAM_NAME'].isin(replacement_mapping.keys()), 'TEAM_NAME'] = df_draft['TEAM_NAME'].map(replacement_mapping)
  df_draft = df_draft['TEAM_NAME']

  for i in df_draft[0:len(player_IDs)]:
    draft_order.append(i)

  for ID in range(len(player_IDs)):
    filtered_row = df_2023[df_2023['PLAYER_ID'] == player_IDs[ID]]
    filtered_row['Team'] = draft_order[ID]
    filtered_row.drop(columns = 'PLAYER_ID', inplace = True)
    df1 = pd.concat([df1, filtered_row], ignore_index = True)

  rank_pre_proc = rank_pre_processing(df1)
  indiv_ranks = score_and_rank(rank_pre_proc)
  ranks_new = group_ranker(indiv_ranks, df_weight)

  return ranks_new

In [None]:
prospect_ranker([23, 276])

Unnamed: 0_level_0,df_F,df_LD,df_RD,df_G
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ANA,0.21875,0.125,0.03125,0.625
ARI,0.0625,0.4375,0.46875,0.875
BOS,0.5625,0.59375,0.71875,0.71875
BUF,0.0,0.28125,0.6875,0.65625
CAR,0.125,0.3125,0.1875,0.15625
CBJ,0.09375,0.09375,0.15625,0.25
CGY,0.75,0.9375,0.96875,0.46875
CHI,0.3125,0.03125,0.3125,0.84375
COL,0.9375,0.78125,0.9375,0.59375
DAL,0.46875,0.75,0.25,0.3125


In [None]:
prospect_ranker([])

Unnamed: 0_level_0,df_F,df_LD,df_RD,df_G
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ANA,0.21875,0.125,0.03125,0.625
ARI,0.0625,0.4375,0.46875,0.875
BOS,0.5625,0.59375,0.71875,0.71875
BUF,0.0,0.28125,0.6875,0.65625
CAR,0.125,0.3125,0.1875,0.15625
CBJ,0.09375,0.09375,0.15625,0.25
CGY,0.75,0.9375,0.96875,0.46875
CHI,0.4375,0.03125,0.3125,0.84375
COL,0.9375,0.78125,0.9375,0.59375
DAL,0.46875,0.75,0.25,0.3125


## 6. Objective Function

In [None]:
url2 = 'https://docs.google.com/uc?export=download&id=1DAUvQv-EiUHWUPRBJEiXSy4jcZuoMf1Y'

# Read the data from the CSV file
players_df = pd.read_csv(url2)
players_df

In [None]:
# Inputs from Pita's front end

Team = 'Blackhawks'

lw = 0
rw = 1
c = 1
ld = 1
rd = 1
g = 0

pos_constraint = {'LW': lw, 'RW': rw, 'C': c, 'LD': ld, 'RD': rd, 'G': g}

In [None]:
def get_value(row, team):
    if row['Specific POS'] in ['LW', 'RW', 'C']:
        return result.loc[team, 'df_F']
    elif row['Specific POS'] == 'LD':
        return result.loc[team, 'df_LD']
    elif row['Specific POS'] == 'RD':
        return result.loc[team, 'df_RD']
    else:
        return result.loc[team, 'df_G']

#players_df['Team Need'] = players_df.apply(get_value, axis=1, team=Team)

In [None]:
def objective(df, pos_const, selected_team):

  df['Team Need'] = df.apply(get_value, axis=1, team=selected_team)

  # Define variables
  x = cp.Variable(len(df.index), boolean=True)

  # Define objective
  obj_lp = cp.Maximize(x@df['Value']+x@df['Team Need'])

  # Define constraints
  cons_lp = []  # Initialize constraint list

  for position, max_players in pos_const.items():
    cons_lp.append(cp.sum(x[df['Specific POS'] == position]) <= max_players)
  cons_lp.append(sum(x)==1)

  prob_lp = cp.Problem(obj_lp,cons_lp)
  sol = prob_lp.solve()

  x_np_array_lp = x.value.astype(float)  # extract the x values as a np array
  x_values_lp = pd.Series(x_np_array_lp, index = df.index)  # convert the np array to a Datafram
  selected = np.where(x_values_lp == 1)[0]  # get assignments

  # Print selected player
  return sol, selected


In [None]:
our_picks = [1, 2, 4, 7]

for picks in range(1,9):
  print(f'pick is {picks}')
  if picks in our_picks:
    obj, draft = objective(players_df, pos_constraint, Team)
    print(players_df.iloc[draft[0]])
    print(f'The objective is {obj}')
    pos_constraint[players_df.iloc[draft[0],5]] = pos_constraint[players_df.iloc[draft[0],5]] - 1
    players_df.drop(players_df.index[draft[0]], inplace = True)

  else:
    # Get a random index
    random_index = random.choice(players_df.index.tolist())
    print(f'random index is {random_index}')
    print(players_df.iloc[random_index])
    players_df.drop(players_df.index[random_index], inplace = True)


In [None]:
players_df[players_df['Name']=='Gulyayev, Mikhail']