# Part 1 - Libraries

In [293]:
#pip install yfinance
#pip install matplotlib pendulum
# pip.exe install selenium in anaconda terminal

import time
start_time = time.time() # Start time of script

# Pulling Data
import yfinance as yf
import pandas as pd
import pendulum
import matplotlib.pyplot as plt
import requests
from bs4 import BeautifulSoup
import datetime
from urllib.request import urlopen
import re
import selenium
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.select import Select
from selenium.webdriver.common import keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import NoSuchElementException
from time import sleep
import string
import unidecode

headers = requests.utils.default_headers()
headers.update({
    'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0',
})

chrome_options = Options()
chrome_options.add_argument("--headless")  # servers don't provide the visulazation
chrome_options.add_argument("--no-sandbox")  # operate at the highest authority
chrome_options.add_argument(
    "--disable-dev-shm-usage"
)  # increase the RAM of chrome to load the page


path = "chromedriver.exe"

# Math
import math
import numpy as np
import warnings

# Data Management
import gc

# Wrapping
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SequentialFeatureSelector as SFS
from sklearn.model_selection import train_test_split

# Modeling
from sklearn.metrics import r2_score
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
import xgboost as xgb

# For business days
from pandas.tseries.holiday import USFederalHolidayCalendar
from pandas.tseries.offsets import CustomBusinessDay
US_BUSINESS_DAY = CustomBusinessDay(calendar=USFederalHolidayCalendar())


warnings.filterwarnings("ignore")

# Part 2 - Get Data 

## Part 2.1 - Historic and Current Stats

In [294]:
stats_df = pd.read_csv(r'C:\Users\casey\OneDrive\Desktop\2k23 Fantasy\Ouput\v1_historic_and_current_stats.csv').drop(['Unnamed: 0'], axis = 1)
stats_df['Name'] = [i.replace('*', '') for i in stats_df['Name']]
stats_df

Unnamed: 0,Season,Team,Name,Points,Rebounds,Assists,Steals,Blocks
0,2002,ATL,Shareef Abdur-Rahim,21.20,9.00,3.10,1.30,1.10
1,2002,WAS,Courtney Alexander,9.80,2.60,1.50,0.60,0.10
2,2002,DET,Victor Alexander,2.70,1.90,0.40,0.00,0.10
3,2002,MIA,Malik Allen,4.30,3.20,0.40,0.30,0.70
4,2002,MIL,Ray Allen,21.80,4.50,3.90,1.30,0.30
...,...,...,...,...,...,...,...,...
9361,2023,DAL,Christian Wood,17.49,9.54,2.27,0.80,0.85
9362,2023,WAS,Delon Wright,5.89,2.99,2.83,1.09,0.26
9363,2023,TOR,Thaddeus Young,3.39,4.03,2.33,0.83,0.33
9364,2023,ATL,Trae Young,28.73,3.65,7.91,0.91,0.21


## Part 2.2 - Historic and Current Standings

In [295]:
standings_df = pd.read_csv(r'C:\Users\casey\OneDrive\Desktop\2k23 Fantasy\Ouput\v1_alltime_team_predictions.csv').drop(['Unnamed: 0'], axis = 1)
standings_df

Unnamed: 0,Season,Team,Record
0,2003,SAS,0.732
1,2003,DAL,0.732
2,2003,SAC,0.720
3,2003,MIN,0.622
4,2003,DET,0.610
...,...,...,...
623,2023,CHA,0.451
624,2023,WAS,0.390
625,2023,IND,0.317
626,2023,ORL,0.293


## Part 2.3 - All-Stars

In [296]:
all_star_years = []
all_star_names = []
all_stars = []


for year in range(2003, 2023):

    url = 'https://basketball.realgm.com/nba/allstar/game/rosters/' + str(year)
    page = requests.get(url, headers=headers)
    soup = BeautifulSoup(page.content,"lxml")
    conference_tables = soup.find_all('tbody')
    conference_1_table = conference_tables[0].find_all('tr')
    conference_2_table = conference_tables[1].find_all('tr')

    for player in conference_1_table:
        all_star_years.append(year)
        name = player.find('td').get_text()
        all_star_names.append(name)
        all_stars.append(1)
    for player in conference_2_table:
        all_star_years.append(year)
        name = player.find('td').get_text()
        all_star_names.append(name)
        all_stars.append(1)

all_star_df = pd.DataFrame({'Season': all_star_years, 'Name': all_star_names, 'Allstar': all_stars})
all_star_df

Unnamed: 0,Season,Name,Allstar
0,2003,Vince Carter,1
1,2003,Zydrunas Ilgauskas,1
2,2003,Allen Iverson,1
3,2003,Michael Jordan,1
4,2003,Jason Kidd,1
...,...,...,...
515,2022,Dejounte Murray,1
516,2022,Jayson Tatum,1
517,2022,Karl-Anthony Towns,1
518,2022,Andrew Wiggins,1


## Part 2.4 - MVP

In [297]:
mvps = []
seasons = []

url = 'https://www.basketball-reference.com/awards/mvp.html'
page = requests.get(url, headers=headers)
soup = BeautifulSoup(page.content,"lxml")
mvp_table = soup.find_all('tbody')[0]
for year_row in mvp_table.find_all('tr'):
    season_text = year_row.find('a').get_text()
    season = '20' + season_text[-2:]
    name = unidecode.unidecode(year_row.find_all('td')[1].get_text())
    if (int(season) >= 2003) & (int(season) < 2023):
        mvps.append(name)
        seasons.append(int(season))
mvp_df = pd.DataFrame({'Season': seasons, 'Name': mvps, 'MVP': 1})
mvp_df

Unnamed: 0,Season,Name,MVP
0,2022,Nikola Jokic,1
1,2021,Nikola Jokic,1
2,2020,Giannis Antetokounmpo,1
3,2019,Giannis Antetokounmpo,1
4,2018,James Harden,1
5,2017,Russell Westbrook,1
6,2016,Stephen Curry,1
7,2015,Stephen Curry,1
8,2014,Kevin Durant,1
9,2013,LeBron James,1


## 2.5 - DPOY

In [298]:
dpoys = []
seasons = []

url = 'https://www.basketball-reference.com/awards/dpoy.html'
page = requests.get(url, headers=headers)
soup = BeautifulSoup(page.content,"lxml")
dpoy_table = soup.find_all('tbody')[0]
for year_row in dpoy_table.find_all('tr'):
    season_text = year_row.find('a').get_text()
    season = '20' + season_text[-2:]
    name = unidecode.unidecode(year_row.find_all('td')[1].get_text())
    if (int(season) >= 2003) & (int(season) < 2023):
        dpoys.append(name)
        seasons.append(int(season))
dpoy_df = pd.DataFrame({'Season': seasons, 'Name': dpoys, 'DPOY': 1})
dpoy_df

Unnamed: 0,Season,Name,DPOY
0,2022,Marcus Smart,1
1,2021,Rudy Gobert,1
2,2020,Giannis Antetokounmpo,1
3,2019,Rudy Gobert,1
4,2018,Rudy Gobert,1
5,2017,Draymond Green,1
6,2016,Kawhi Leonard,1
7,2015,Kawhi Leonard,1
8,2014,Joakim Noah,1
9,2013,Marc Gasol,1


## 2.6 - ROY

In [299]:
roys = []
seasons = []

url = 'https://www.basketball-reference.com/awards/roy.html'
page = requests.get(url, headers=headers)
soup = BeautifulSoup(page.content,"lxml")
roy_table = soup.find_all('tbody')[0]
for year_row in roy_table.find_all('tr'):
    season_text = year_row.find('a').get_text()
    season = '20' + season_text[-2:]
    name = unidecode.unidecode(year_row.find_all('td')[1].get_text())
    if (int(season) >= 2003) & (int(season) < 2023):
        roys.append(name)
        seasons.append(int(season))
roy_df = pd.DataFrame({'Season': seasons, 'Name': roys, 'ROY': 1})
roy_df

Unnamed: 0,Season,Name,ROY
0,2022,Scottie Barnes,1
1,2021,LaMelo Ball,1
2,2020,Ja Morant,1
3,2019,Luka Doncic,1
4,2018,Ben Simmons,1
5,2017,Malcolm Brogdon,1
6,2016,Karl-Anthony Towns,1
7,2015,Andrew Wiggins,1
8,2014,Michael Carter-Williams,1
9,2013,Damian Lillard,1


## Part 2.7 - Merge DataFrames Together

In [300]:
award_raw_data = stats_df.merge(standings_df, on = ['Season', 'Team'])\
.merge(all_star_df, on = ['Season', 'Name'], how = 'left')\
.merge(mvp_df, on = ['Season', 'Name'], how = 'left')\
.merge(dpoy_df, on = ['Season', 'Name'], how = 'left')\
.merge(roy_df, on = ['Season', 'Name'], how = 'left')
award_raw_data['Allstar'].fillna(0, inplace=True)
award_raw_data['MVP'].fillna(0, inplace=True)
award_raw_data['DPOY'].fillna(0, inplace=True)
award_raw_data['ROY'].fillna(0, inplace=True)
award_raw_data

Unnamed: 0,Season,Team,Name,Points,Rebounds,Assists,Steals,Blocks,Record,Allstar,MVP,DPOY,ROY
0,2003,DAL,Tariq Abdul-Wahad,4.10,2.90,1.50,0.40,0.20,0.732,0.0,0.0,0.0,0.0
1,2003,DAL,Raja Bell,3.10,1.90,0.80,0.70,0.10,0.732,0.0,0.0,0.0,0.0
2,2003,DAL,Shawn Bradley,6.70,5.90,0.70,0.80,2.10,0.732,0.0,0.0,0.0,0.0
3,2003,DAL,Evan Eschmeyer,1.00,1.70,0.40,0.60,0.40,0.732,0.0,0.0,0.0,0.0
4,2003,DAL,Michael Finley,19.30,5.80,3.00,1.10,0.30,0.732,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
8951,2023,MIL,Joe Ingles,5.60,3.09,3.12,0.55,0.16,0.622,0.0,0.0,0.0,0.0
8952,2023,MIL,Brook Lopez,11.10,4.03,0.58,0.53,0.86,0.622,0.0,0.0,0.0,0.0
8953,2023,MIL,Wesley Matthews,4.54,1.79,1.19,0.48,0.22,0.622,0.0,0.0,0.0,0.0
8954,2023,MIL,Khris Middleton,19.43,4.55,4.94,1.00,0.27,0.622,0.0,0.0,0.0,0.0


# Part 3 - Modeling

## 3.1 - All-Star

In [301]:
# Separate Data
training = award_raw_data[award_raw_data['Season'] < 2023]
testing = award_raw_data[award_raw_data['Season'] == 2023]

# Determine which columns to use
training_columns = list(training.columns)[3:9]
output_column = 'Allstar'

In [302]:
y = list(training[output_column])
x = training.loc[:,training_columns]

x_pred = testing.loc[:,training_columns]



# Random Forest
rf_allstar = RandomForestClassifier(n_estimators=200, criterion='gini', \
                                max_depth=20, min_samples_split=2, min_samples_leaf=1, \
                                min_weight_fraction_leaf=0.0, max_features=1.0, \
                                max_leaf_nodes=None, min_impurity_decrease=0.0, \
                                bootstrap=True, oob_score=False, n_jobs=-1, \
                                random_state=None, verbose=0, warm_start=False, \
                                ccp_alpha=0.0, max_samples=None)

rf_allstar.fit(x, y)

rf_allstar_results_train = rf_allstar.predict_proba(x)
training['Allstar_Probability_RF'] = [i[1] for i in rf_allstar_results_train]

rf_allstar_results_test = rf_allstar.predict_proba(x_pred)
testing['Allstar_Probability_RF'] = [i[1] for i in rf_allstar_results_test]

# XGBoost
xgb_allstar = xgb.XGBClassifier(gamma=10, learning_rate=0.1, max_delta_step=0, max_depth=50, objective='binary:logistic')

xgb_allstar.fit(x, y)

xgb_allstar_results_train = xgb_allstar.predict_proba(x)
training['Allstar_Probability_XGB'] = [i[1] for i in xgb_allstar_results_train]

xgb_allstar_results_test = xgb_allstar.predict_proba(x_pred)
testing['Allstar_Probability_XGB'] = [i[1] for i in xgb_allstar_results_test]

# Logistic
logistic_allstar = LogisticRegression()

logistic_allstar.fit(x, y)

logistic_allstar_results_train = logistic_allstar.predict_proba(x)
training['Allstar_Probability_Logistic'] = [i[1] for i in logistic_allstar_results_train]

logistic_allstar_results_test = logistic_allstar.predict_proba(x_pred)
testing['Allstar_Probability_Logistic'] = [i[1] for i in logistic_allstar_results_test]

testing['AllStar Probability'] = round((testing['Allstar_Probability_RF'] + testing['Allstar_Probability_XGB'] + \
testing['Allstar_Probability_Logistic']) / 3, 3)


## 3.2 - MVP

In [303]:
output_column = 'MVP'

In [304]:
y = list(training[output_column])
x = training.loc[:,training_columns]

x_pred = testing.loc[:,training_columns]



# Random Forest
rf_mvp = RandomForestClassifier(n_estimators=200, criterion='gini', \
                                max_depth=20, min_samples_split=2, min_samples_leaf=1, \
                                min_weight_fraction_leaf=0.0, max_features=1.0, \
                                max_leaf_nodes=None, min_impurity_decrease=0.0, \
                                bootstrap=True, oob_score=False, n_jobs=-1, \
                                random_state=None, verbose=0, warm_start=False, \
                                ccp_alpha=0.0, max_samples=None)

rf_mvp.fit(x, y)

rf_mvp_results_train = rf_mvp.predict_proba(x)
training['MVP_Probability_RF'] = [i[1] for i in rf_mvp_results_train]

rf_mvp_results_test = rf_mvp.predict_proba(x_pred)
testing['MVP_Probability_RF'] = [i[1] for i in rf_mvp_results_test]

# XGBoost
xgb_mvp = xgb.XGBClassifier(gamma=10, learning_rate=0.1, max_delta_step=0, max_depth=50, objective='binary:logistic')

xgb_mvp.fit(x, y)

xgb_mvp_results_train = xgb_mvp.predict_proba(x)
training['MVP_Probability_XGB'] = [i[1] for i in xgb_mvp_results_train]

xgb_mvp_results_test = xgb_mvp.predict_proba(x_pred)
testing['MVP_Probability_XGB'] = [i[1] for i in xgb_mvp_results_test]

# Logistic
logistic_mvp = LogisticRegression()

logistic_mvp.fit(x, y)

logistic_mvp_results_train = logistic_mvp.predict_proba(x)
training['MVP_Probability_Logistic'] = [i[1] for i in logistic_mvp_results_train]

logistic_mvp_results_test = logistic_mvp.predict_proba(x_pred)
testing['MVP_Probability_Logistic'] = [i[1] for i in logistic_mvp_results_test]



testing['MVP Probability'] = (testing['MVP_Probability_RF'] + testing['MVP_Probability_XGB'] + \
testing['MVP_Probability_Logistic']) / 3


testing['MVP Percentile'] = [round((i - testing['MVP Probability'].min()) / \
                                       (testing['MVP Probability'].max() - testing['MVP Probability'].min()), 3) \
                                       for i in testing['MVP Probability']]


## 3.3 - DPOY

In [305]:
output_column = 'DPOY'
dpoy_cols = ['Steals', 'Blocks', 'Record']

In [306]:
y = list(training[output_column])
x = training.loc[:,dpoy_cols]

x_pred = testing.loc[:,dpoy_cols]



# Random Forest
rf_dpoy = RandomForestClassifier(n_estimators=200, criterion='gini', \
                                max_depth=20, min_samples_split=2, min_samples_leaf=1, \
                                min_weight_fraction_leaf=0.0, max_features=1.0, \
                                max_leaf_nodes=None, min_impurity_decrease=0.0, \
                                bootstrap=True, oob_score=False, n_jobs=-1, \
                                random_state=None, verbose=0, warm_start=False, \
                                ccp_alpha=0.0, max_samples=None)

rf_dpoy.fit(x, y)

rf_dpoy_results_train = rf_dpoy.predict_proba(x)
training['DPOY_Probability_RF'] = [i[1] for i in rf_dpoy_results_train]

rf_dpoy_results_test = rf_dpoy.predict_proba(x_pred)
testing['DPOY_Probability_RF'] = [i[1] for i in rf_dpoy_results_test]

# XGBoost
xgb_dpoy = xgb.XGBClassifier(gamma=10, learning_rate=0.1, max_delta_step=0, max_depth=50, objective='binary:logistic')

xgb_dpoy.fit(x, y)

xgb_dpoy_results_train = xgb_dpoy.predict_proba(x)
training['DPOY_Probability_XGB'] = [i[1] for i in xgb_dpoy_results_train]

xgb_dpoy_results_test = xgb_dpoy.predict_proba(x_pred)
testing['DPOY_Probability_XGB'] = [i[1] for i in xgb_dpoy_results_test]

# Logistic
logistic_dpoy = LogisticRegression()

logistic_dpoy.fit(x, y)

logistic_dpoy_results_train = logistic_dpoy.predict_proba(x)
training['DPOY_Probability_Logistic'] = [i[1] for i in logistic_dpoy_results_train]

logistic_dpoy_results_test = logistic_dpoy.predict_proba(x_pred)
testing['DPOY_Probability_Logistic'] = [i[1] for i in logistic_dpoy_results_test]



testing['DPOY Probability'] = (testing['DPOY_Probability_RF'] + testing['DPOY_Probability_XGB'] + \
testing['DPOY_Probability_Logistic']) / 3


testing['DPOY Percentile'] = [min(round((i - testing['DPOY Probability'].min()) / \
                                       (testing['DPOY Probability'].max() - testing['DPOY Probability'].min()), 3),1) \
                                       for i in testing['DPOY Probability']]


## 3.4 - ROY

In [307]:
current_players = list(testing.Name)
past_players = list(training.Name.unique())
candidates = [i if i not in past_players else '' for i in current_players]
candidates = [x for x in candidates if x != '']
output_column = 'ROY'
rookie_columns = training_columns.copy()
rookie_columns.remove('Record')

In [308]:
y = list(training[output_column])
x = training.loc[:,rookie_columns]

x_pred = testing.loc[:,rookie_columns]



# Random Forest
rf_roy = RandomForestClassifier(n_estimators=200, criterion='gini', \
                                max_depth=20, min_samples_split=2, min_samples_leaf=1, \
                                min_weight_fraction_leaf=0.0, max_features=1.0, \
                                max_leaf_nodes=None, min_impurity_decrease=0.0, \
                                bootstrap=True, oob_score=False, n_jobs=-1, \
                                random_state=None, verbose=0, warm_start=False, \
                                ccp_alpha=0.0, max_samples=None)

rf_roy.fit(x, y)

rf_roy_results_train = rf_roy.predict_proba(x)
training['ROY_Probability_RF'] = [i[1] for i in rf_roy_results_train]

rf_roy_results_test = rf_roy.predict_proba(x_pred)
testing['ROY_Probability_RF'] = [i[1] for i in rf_roy_results_test]

# XGBoost
xgb_roy = xgb.XGBClassifier(gamma=10, learning_rate=0.1, max_delta_step=0, max_depth=50, objective='binary:logistic')

xgb_roy.fit(x, y)

xgb_roy_results_train = xgb_roy.predict_proba(x)
training['ROY_Probability_XGB'] = [i[1] for i in xgb_roy_results_train]

xgb_roy_results_test = xgb_roy.predict_proba(x_pred)
testing['ROY_Probability_XGB'] = [i[1] for i in xgb_roy_results_test]

# Logistic
logistic_roy = LogisticRegression()

logistic_roy.fit(x, y)

logistic_roy_results_train = logistic_roy.predict_proba(x)
training['ROY_Probability_Logistic'] = [i[1] for i in logistic_roy_results_train]

logistic_roy_results_test = logistic_roy.predict_proba(x_pred)
testing['ROY_Probability_Logistic'] = [i[1] for i in logistic_roy_results_test]



testing['ROY Probability'] = (testing['ROY_Probability_RF'] + testing['ROY_Probability_XGB'] + \
testing['ROY_Probability_Logistic']) / 3


testing['ROY Percentile'] = [min(round((i - testing['ROY Probability'].min()) / \
                                       (testing['ROY Probability'].max() \
                                        - testing['ROY Probability'].min()), 3),1) \
                                       for i in testing['ROY Probability']]


# Part 4 - Exporting Datsets

In [309]:
conferences = {
 'PHX': 'West',
 'GSW': 'West',
 'MEM': 'West',
 'DAL': 'West',
 'DEN': 'West',
 'LAC': 'West',
 'MIN': 'West',
 'NOP': 'West',
    
 'LAL': 'West',
 'SAC': 'West',
    
 'POR': 'West',
 'OKC': 'West',
 'HOU': 'West',
 'SAS': 'West',
 'UTA': 'West',
    
 'BOS': 'East',
 'PHI': 'East',
 'MIA': 'East',
 'MIL': 'East',
 'ATL': 'East',
 'BKN': 'East',
 'CLE': 'East',
 'CHI': 'East',
    
 'TOR': 'East',
 'NYK': 'East',
    
 'CHA': 'East',
 'WAS': 'East',
 'IND': 'East',
 'ORL': 'East',
 'DET': 'East'
}


testing['Conference'] = [conferences[i] for i in testing['Team']]

## 4.1 - Allstars

In [310]:
testing = testing.sort_values('AllStar Probability', ascending = False)

In [311]:
west_counter = 0
western_guards = 0
east_counter = 0
selected = []

for i in range(len(testing)):
    conference = testing.iloc[i, -1]
    if conference == 'East':
        east_counter += 1
        if east_counter <= 20:
            selected.append(True)
        else:
            selected.append(False)
    else:
        west_counter += 1
        if west_counter <= 20:
            selected.append(True)
        else:
            selected.append(False)
testing['Predicted AllStar'] = selected

In [312]:
western_allstars = testing[(testing['Conference'] == 'West') & (testing['Predicted AllStar'] == True)][['Conference', 'Team', 'Name', 'AllStar Probability']]
eastern_allstars = testing[(testing['Conference'] == 'East') & (testing['Predicted AllStar'] == True)][['Conference', 'Team', 'Name', 'AllStar Probability']]

In [313]:
western_allstars.to_csv('v1_west_allstar_predictions.csv')
eastern_allstars.to_csv('v1_east_allstar_predictions.csv')

In [314]:
western_allstars

Unnamed: 0,Conference,Team,Name,AllStar Probability
8786,West,DAL,Luka Doncic,0.978
8675,West,DEN,Nikola Jokic,0.977
8928,West,PHX,Devin Booker,0.958
8695,West,MEM,Ja Morant,0.954
8773,West,LAL,LeBron James,0.932
8821,West,LAC,Paul George,0.928
8824,West,LAC,Kawhi Leonard,0.916
8737,West,GSW,Stephen Curry,0.908
8715,West,MIN,Karl-Anthony Towns,0.888
8881,West,POR,Damian Lillard,0.771


In [315]:
eastern_allstars

Unnamed: 0,Conference,Team,Name,AllStar Probability
8943,East,MIL,Giannis Antetokounmpo,0.982
8892,East,PHI,Joel Embiid,0.981
8863,East,BOS,Jayson Tatum,0.966
8913,East,BKN,Kevin Durant,0.953
8893,East,PHI,James Harden,0.945
8719,East,MIA,Bam Adebayo,0.894
8842,East,CLE,Donovan Mitchell,0.891
8915,East,BKN,Kyrie Irving,0.886
8649,East,ATL,Trae Young,0.869
8853,East,BOS,Jaylen Brown,0.862


## 4.2 - MVP

In [316]:
mvp_df = testing.sort_values('MVP Percentile', ascending = False)[:10][['Conference', 'Team', 'Name', 'MVP Percentile']]
mvp_df['Ranking'] = mvp_df['MVP Percentile'].rank(method='min', ascending = False).astype(int)
mvp_df = mvp_df[['Conference', 'Team', 'Name', 'Ranking']]
mvp_df.to_csv('v1_mvp_predictions.csv')
mvp_df

Unnamed: 0,Conference,Team,Name,Ranking
8675,West,DEN,Nikola Jokic,1
8786,West,DAL,Luka Doncic,2
8943,East,MIL,Giannis Antetokounmpo,3
8928,West,PHX,Devin Booker,4
8892,East,PHI,Joel Embiid,5
8649,East,ATL,Trae Young,6
8695,West,MEM,Ja Morant,7
8913,East,BKN,Kevin Durant,8
8893,East,PHI,James Harden,9
8773,West,LAL,LeBron James,10


## 4.3 - DPOY

In [317]:
dpoy_df = testing.sort_values('DPOY Percentile', ascending = False)[:10][['Conference', 'Team', 'Name', 'DPOY Percentile']]
dpoy_df['Ranking'] = dpoy_top_candidates['DPOY Percentile'].rank(method='min', ascending = False).astype(int)
dpoy_df = dpoy_df[['Conference', 'Team', 'Name', 'Ranking']]
dpoy_df.to_csv('v1_dpoy_predictions.csv')
dpoy_df

Unnamed: 0,Conference,Team,Name,Ranking
8832,East,CLE,Evan Mobley,1
8692,West,MEM,Jaren Jackson Jr.,1
8869,East,BOS,Robert Williams III,3
8943,East,MIL,Giannis Antetokounmpo,4
8771,West,LAL,Anthony Davis,7
8549,East,IND,Myles Turner,5
8904,East,PHI,Matisse Thybulle,6
8452,East,ORL,Jonathan Isaac,8
8719,East,MIA,Bam Adebayo,9
8892,East,PHI,Joel Embiid,10


## 4.3 - ROY

In [318]:
rookie_df = testing[testing['Name'].isin(candidates)].sort_values('ROY Percentile', ascending = False)[:10][['Conference', 'Team', 'Name', 'ROY Percentile']]
rookie_df['Ranking'] = rookie_df['ROY Percentile'].rank(method='min', ascending = False).astype(int)
rookie_df = rookie_df[['Conference', 'Team', 'Name', 'Ranking']]
rookie_df.to_csv('v1_roy_predictions.csv')
rookie_df

Unnamed: 0,Conference,Team,Name,Ranking
8550,West,NOP,Dyson Daniels,1
8477,West,HOU,Jabari Smith Jr.,2
8439,East,ORL,Paolo Banchero,3
8457,West,OKC,Chet Holmgren,3
8514,East,DET,Jaden Ivey,5
8459,West,OKC,Jalen Williams,6
8496,West,SAC,Keegan Murray,6
8873,West,POR,Keon Johnson,8
8530,East,IND,Bennedict Mathurin,8
8531,East,IND,Andrew Nembhard,10
