In [None]:
!pip install nba_api
!pip install requests

In [None]:
import pandas as pd
import requests

## Data Collection

#### Get player stats by season

In [None]:
from nba_api.stats.endpoints import teamplayerdashboard
from nba_api.stats.endpoints import leaguedashteamstats
from nba_api.stats.endpoints import playerdashboardbyyearoveryear
import random
import numpy as np

# Get list of teams that played in a season
def get_teams(start_year):
    end_year = (start_year + 1) % 100
    leagueteams = leaguedashteamstats.LeagueDashTeamStats(season=f'{start_year}-{end_year:02}')
    teams = leagueteams.get_data_frames()[0]
    return teams.loc[:, 'TEAM_ID']

# Get list of players that played > 1000 min and scored > 600 points for a team in a season.
# 1000 min and 600 points total is ~12 mpg and 7 ppg so this filters out players that don't get much playtime.
def get_players(team_id, start_year):
    end_year = (start_year + 1) % 100
    teamplayers = teamplayerdashboard.TeamPlayerDashboard(team_id, season=f'{start_year}-{end_year:02}')
    players = teamplayers.get_data_frames()[1]
    good_players = players[players.loc[:,'MIN'] > 1000]
    good_players = good_players[good_players.loc[:, 'PTS'] > 600]
    return good_players.loc[:, ['PLAYER_ID', 'PLAYER_NAME']]

# Get player stats for a season
def get_player_stats(player_id, start_year):
    end_year = (start_year + 1) % 100
    playerdashboard = playerdashboardbyyearoveryear.PlayerDashboardByYearOverYear(player_id, per_mode_detailed='PerGame')
    stats = playerdashboard.get_data_frames()[1]
    seasonstats = stats[stats['GROUP_VALUE'] == f'{start_year}-{end_year:02}']
    return seasonstats.drop(columns=['GROUP_SET', 'TEAM_ID', 'MAX_GAME_DATE', 'CFID', 'CFPARAMS'])

teams_2020 = get_teams(2020)
rand_team = get_players(teams_2020.iat[random.randint(0, 29)], 2020)
print(rand_team.shape)
rand_player = rand_team.iloc[random.randint(0, 5), :]
print(rand_player)
stats = get_player_stats(rand_player.iat[0], 2020)
print(stats)
print(stats.columns)

(5, 2)
PLAYER_ID                 1626157
PLAYER_NAME    Karl-Anthony Towns
Name: 12, dtype: object
  GROUP_VALUE TEAM_ABBREVIATION  GP   W   L  W_PCT   MIN  FGM   FGA  FG_PCT  \
1     2020-21               MIN  50  19  31   0.38  33.8  8.5  17.5   0.486   

   ...  STL_RANK  BLK_RANK  BLKA_RANK  PF_RANK  PFD_RANK  PTS_RANK  \
1  ...         5         7          4        2         3         3   

   PLUS_MINUS_RANK  NBA_FANTASY_PTS_RANK  DD2_RANK  TD3_RANK  
1                4                     4         5         2  

[1 rows x 60 columns]
Index(['GROUP_VALUE', 'TEAM_ABBREVIATION', 'GP', 'W', 'L', 'W_PCT', 'MIN',
       'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA',
       'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'TOV', 'STL', 'BLK', 'BLKA',
       'PF', 'PFD', 'PTS', 'PLUS_MINUS', 'NBA_FANTASY_PTS', 'DD2', 'TD3',
       'GP_RANK', 'W_RANK', 'L_RANK', 'W_PCT_RANK', 'MIN_RANK', 'FGM_RANK',
       'FGA_RANK', 'FG_PCT_RANK', 'FG3M_RANK', 'FG3A_RANK', 'FG3_PCT_RANK',
   

In [None]:
import pandas as pd
import time
from nba_api.stats.endpoints import teamplayerdashboard
from nba_api.stats.endpoints import leaguedashteamstats

# Iterate through each season and save data to csv
for start_year in range(2012, 2022):
    data = []
    end_year = (start_year + 1) % 100
    print(f'{start_year}-{end_year:02}')
    leagueteams = get_teams(start_year)
    for team_id in leagueteams:
        players = get_players(team_id, start_year)
        for player in players.itertuples():
            print(player)
            time.sleep(1)
            stats = get_player_stats(player[1], start_year)
            stats.insert(0, "PLAYER_NAME", player[2])
            data.append(stats)
    
    big_data = pd.concat(data)
    big_data.to_csv(f'{start_year}-{end_year:02}.csv')

2012-13
Pandas(Index=0, PLAYER_ID=201143, PLAYER_NAME='Al Horford')
Pandas(Index=8, PLAYER_ID=201952, PLAYER_NAME='Jeff Teague')
Pandas(Index=12, PLAYER_ID=2746, PLAYER_NAME='Josh Smith')
Pandas(Index=13, PLAYER_ID=2594, PLAYER_NAME='Kyle Korver')
Pandas(Index=1, PLAYER_ID=101138, PLAYER_NAME='Brandon Bass')
Pandas(Index=3, PLAYER_ID=201584, PLAYER_NAME='Courtney Lee')
Pandas(Index=10, PLAYER_ID=1891, PLAYER_NAME='Jason Terry')
Pandas(Index=11, PLAYER_ID=201145, PLAYER_NAME='Jeff Green')
Pandas(Index=13, PLAYER_ID=708, PLAYER_NAME='Kevin Garnett')
Pandas(Index=16, PLAYER_ID=1718, PLAYER_NAME='Paul Pierce')
Pandas(Index=0, PLAYER_ID=101154, PLAYER_NAME='Andray Blatche')
Pandas(Index=1, PLAYER_ID=201572, PLAYER_NAME='Brook Lopez')
Pandas(Index=4, PLAYER_ID=101114, PLAYER_NAME='Deron Williams')
Pandas(Index=7, PLAYER_ID=2207, PLAYER_NAME='Joe Johnson')
Pandas(Index=0, PLAYER_ID=2732, PLAYER_NAME='Ben Gordon')
Pandas(Index=6, PLAYER_ID=201945, PLAYER_NAME='Gerald Henderson')
Pandas(Index=1

Pandas(Index=8, PLAYER_ID=101162, PLAYER_NAME='Marcin Gortat')
Pandas(Index=10, PLAYER_ID=202693, PLAYER_NAME='Markieff Morris')
Pandas(Index=11, PLAYER_ID=201563, PLAYER_NAME='Michael Beasley')
Pandas(Index=14, PLAYER_ID=200769, PLAYER_NAME='Shannon Brown')
Pandas(Index=0, PLAYER_ID=203081, PLAYER_NAME='Damian Lillard')
Pandas(Index=2, PLAYER_ID=201581, PLAYER_NAME='JJ Hickson')
Pandas(Index=5, PLAYER_ID=200746, PLAYER_NAME='LaMarcus Aldridge')
Pandas(Index=8, PLAYER_ID=201587, PLAYER_NAME='Nicolas Batum')
Pandas(Index=13, PLAYER_ID=202083, PLAYER_NAME='Wesley Matthews')
Pandas(Index=3, PLAYER_ID=202326, PLAYER_NAME='DeMarcus Cousins')
Pandas(Index=5, PLAYER_ID=202738, PLAYER_NAME='Isaiah Thomas')
Pandas(Index=7, PLAYER_ID=201574, PLAYER_NAME='Jason Thompson')
Pandas(Index=9, PLAYER_ID=2422, PLAYER_NAME='John Salmons')
Pandas(Index=10, PLAYER_ID=201977, PLAYER_NAME='Marcus Thornton')
Pandas(Index=16, PLAYER_ID=201936, PLAYER_NAME='Tyreke Evans')
Pandas(Index=3, PLAYER_ID=201980, PLAYE

Pandas(Index=13, PLAYER_ID=201937, PLAYER_NAME='Ricky Rubio')
Pandas(Index=2, PLAYER_ID=203076, PLAYER_NAME='Anthony Davis')
Pandas(Index=3, PLAYER_ID=201627, PLAYER_NAME='Anthony Morrow')
Pandas(Index=6, PLAYER_ID=203148, PLAYER_NAME='Brian Roberts')
Pandas(Index=8, PLAYER_ID=201569, PLAYER_NAME='Eric Gordon')
Pandas(Index=20, PLAYER_ID=201936, PLAYER_NAME='Tyreke Evans')
Pandas(Index=0, PLAYER_ID=2405, PLAYER_NAME="Amar'e Stoudemire")
Pandas(Index=3, PLAYER_ID=2546, PLAYER_NAME='Carmelo Anthony')
Pandas(Index=8, PLAYER_ID=2747, PLAYER_NAME='JR Smith')
Pandas(Index=13, PLAYER_ID=101109, PLAYER_NAME='Raymond Felton')
Pandas(Index=15, PLAYER_ID=203501, PLAYER_NAME='Tim Hardaway Jr.')
Pandas(Index=4, PLAYER_ID=203087, PLAYER_NAME='Jeremy Lamb')
Pandas(Index=6, PLAYER_ID=201142, PLAYER_NAME='Kevin Durant')
Pandas(Index=10, PLAYER_ID=202704, PLAYER_NAME='Reggie Jackson')
Pandas(Index=13, PLAYER_ID=201566, PLAYER_NAME='Russell Westbrook')
Pandas(Index=15, PLAYER_ID=201586, PLAYER_NAME='Serg

Pandas(Index=5, PLAYER_ID=2561, PLAYER_NAME='David West')
Pandas(Index=7, PLAYER_ID=201588, PLAYER_NAME='George Hill')
Pandas(Index=10, PLAYER_ID=2449, PLAYER_NAME='Luis Scola')
Pandas(Index=12, PLAYER_ID=201155, PLAYER_NAME='Rodney Stuckey')
Pandas(Index=13, PLAYER_ID=201579, PLAYER_NAME='Roy Hibbert')
Pandas(Index=15, PLAYER_ID=203524, PLAYER_NAME='Solomon Hill')
Pandas(Index=1, PLAYER_ID=201933, PLAYER_NAME='Blake Griffin')
Pandas(Index=4, PLAYER_ID=101108, PLAYER_NAME='Chris Paul')
Pandas(Index=6, PLAYER_ID=201599, PLAYER_NAME='DeAndre Jordan')
Pandas(Index=10, PLAYER_ID=200755, PLAYER_NAME='JJ Redick')
Pandas(Index=11, PLAYER_ID=2037, PLAYER_NAME='Jamal Crawford')
Pandas(Index=16, PLAYER_ID=2440, PLAYER_NAME='Matt Barnes')
Pandas(Index=0, PLAYER_ID=2430, PLAYER_NAME='Carlos Boozer')
Pandas(Index=2, PLAYER_ID=202334, PLAYER_NAME='Ed Davis')
Pandas(Index=4, PLAYER_ID=202391, PLAYER_NAME='Jeremy Lin')
Pandas(Index=5, PLAYER_ID=203903, PLAYER_NAME='Jordan Clarkson')
Pandas(Index=6, PL

Pandas(Index=9, PLAYER_ID=201567, PLAYER_NAME='Kevin Love')
Pandas(Index=10, PLAYER_ID=202681, PLAYER_NAME='Kyrie Irving')
Pandas(Index=11, PLAYER_ID=2544, PLAYER_NAME='LeBron James')
Pandas(Index=17, PLAYER_ID=202684, PLAYER_NAME='Tristan Thompson')
Pandas(Index=0, PLAYER_ID=202718, PLAYER_NAME='Chandler Parsons')
Pandas(Index=3, PLAYER_ID=101114, PLAYER_NAME='Deron Williams')
Pandas(Index=5, PLAYER_ID=1717, PLAYER_NAME='Dirk Nowitzki')
Pandas(Index=7, PLAYER_ID=200826, PLAYER_NAME='J.J. Barea')
Pandas(Index=12, PLAYER_ID=101109, PLAYER_NAME='Raymond Felton')
Pandas(Index=14, PLAYER_ID=202083, PLAYER_NAME='Wesley Matthews')
Pandas(Index=15, PLAYER_ID=2585, PLAYER_NAME='Zaza Pachulia')
Pandas(Index=2, PLAYER_ID=201568, PLAYER_NAME='Danilo Gallinari')
Pandas(Index=4, PLAYER_ID=1626144, PLAYER_NAME='Emmanuel Mudiay')
Pandas(Index=6, PLAYER_ID=203914, PLAYER_NAME='Gary Harris')
Pandas(Index=12, PLAYER_ID=202702, PLAYER_NAME='Kenneth Faried')
Pandas(Index=15, PLAYER_ID=203999, PLAYER_NAME=

Pandas(Index=1, PLAYER_ID=203078, PLAYER_NAME='Bradley Beal')
Pandas(Index=7, PLAYER_ID=201162, PLAYER_NAME='Jared Dudley')
Pandas(Index=9, PLAYER_ID=202322, PLAYER_NAME='John Wall')
Pandas(Index=12, PLAYER_ID=101162, PLAYER_NAME='Marcin Gortat')
Pandas(Index=16, PLAYER_ID=203490, PLAYER_NAME='Otto Porter Jr.')
Pandas(Index=17, PLAYER_ID=201196, PLAYER_NAME='Ramon Sessions')
2016-17
Pandas(Index=1, PLAYER_ID=203471, PLAYER_NAME='Dennis Schroder')
Pandas(Index=2, PLAYER_ID=2730, PLAYER_NAME='Dwight Howard')
Pandas(Index=7, PLAYER_ID=203145, PLAYER_NAME='Kent Bazemore')
Pandas(Index=15, PLAYER_ID=200794, PLAYER_NAME='Paul Millsap')
Pandas(Index=19, PLAYER_ID=203501, PLAYER_NAME='Tim Hardaway Jr.')
Pandas(Index=0, PLAYER_ID=201143, PLAYER_NAME='Al Horford')
Pandas(Index=2, PLAYER_ID=202340, PLAYER_NAME='Avery Bradley')
Pandas(Index=5, PLAYER_ID=202738, PLAYER_NAME='Isaiah Thomas')
Pandas(Index=6, PLAYER_ID=203109, PLAYER_NAME='Jae Crowder')
Pandas(Index=11, PLAYER_ID=203482, PLAYER_NAME='

Pandas(Index=2, PLAYER_ID=203967, PLAYER_NAME='Dario Saric')
Pandas(Index=3, PLAYER_ID=101141, PLAYER_NAME='Ersan Ilyasova')
Pandas(Index=4, PLAYER_ID=201945, PLAYER_NAME='Gerald Henderson')
Pandas(Index=13, PLAYER_ID=203917, PLAYER_NAME='Nik Stauskas')
Pandas(Index=15, PLAYER_ID=203496, PLAYER_NAME='Robert Covington')
Pandas(Index=1, PLAYER_ID=203458, PLAYER_NAME='Alex Len')
Pandas(Index=4, PLAYER_ID=1626164, PLAYER_NAME='Devin Booker')
Pandas(Index=7, PLAYER_ID=202339, PLAYER_NAME='Eric Bledsoe')
Pandas(Index=12, PLAYER_ID=1627737, PLAYER_NAME='Marquese Chriss')
Pandas(Index=15, PLAYER_ID=203933, PLAYER_NAME='T.J. Warren')
Pandas(Index=1, PLAYER_ID=203459, PLAYER_NAME='Allen Crabbe')
Pandas(Index=2, PLAYER_ID=203468, PLAYER_NAME='CJ McCollum')
Pandas(Index=3, PLAYER_ID=203081, PLAYER_NAME='Damian Lillard')
Pandas(Index=9, PLAYER_ID=203090, PLAYER_NAME='Maurice Harkless')
Pandas(Index=4, PLAYER_ID=201954, PLAYER_NAME='Darren Collison')
Pandas(Index=5, PLAYER_ID=202326, PLAYER_NAME='De

Pandas(Index=5, PLAYER_ID=201609, PLAYER_NAME='Goran Dragic')
Pandas(Index=6, PLAYER_ID=202355, PLAYER_NAME='Hassan Whiteside')
Pandas(Index=7, PLAYER_ID=201949, PLAYER_NAME='James Johnson')
Pandas(Index=9, PLAYER_ID=1626196, PLAYER_NAME='Josh Richardson')
Pandas(Index=11, PLAYER_ID=203482, PLAYER_NAME='Kelly Olynyk')
Pandas(Index=16, PLAYER_ID=204020, PLAYER_NAME='Tyler Johnson')
Pandas(Index=18, PLAYER_ID=201961, PLAYER_NAME='Wayne Ellington')
Pandas(Index=3, PLAYER_ID=202339, PLAYER_NAME='Eric Bledsoe')
Pandas(Index=5, PLAYER_ID=203507, PLAYER_NAME='Giannis Antetokounmpo')
Pandas(Index=10, PLAYER_ID=203089, PLAYER_NAME='John Henson')
Pandas(Index=11, PLAYER_ID=203114, PLAYER_NAME='Khris Middleton')
Pandas(Index=12, PLAYER_ID=1627763, PLAYER_NAME='Malcolm Brogdon')
Pandas(Index=1, PLAYER_ID=203952, PLAYER_NAME='Andrew Wiggins')
Pandas(Index=6, PLAYER_ID=2037, PLAYER_NAME='Jamal Crawford')
Pandas(Index=7, PLAYER_ID=201952, PLAYER_NAME='Jeff Teague')
Pandas(Index=8, PLAYER_ID=202710, P

Pandas(Index=1, PLAYER_ID=201933, PLAYER_NAME='Blake Griffin')
Pandas(Index=10, PLAYER_ID=204038, PLAYER_NAME='Langston Galloway')
Pandas(Index=11, PLAYER_ID=1628379, PLAYER_NAME='Luke Kennard')
Pandas(Index=13, PLAYER_ID=202704, PLAYER_NAME='Reggie Jackson')
Pandas(Index=10, PLAYER_ID=201142, PLAYER_NAME='Kevin Durant')
Pandas(Index=12, PLAYER_ID=202691, PLAYER_NAME='Klay Thompson')
Pandas(Index=16, PLAYER_ID=201939, PLAYER_NAME='Stephen Curry')
Pandas(Index=4, PLAYER_ID=101108, PLAYER_NAME='Chris Paul')
Pandas(Index=5, PLAYER_ID=203991, PLAYER_NAME='Clint Capela')
Pandas(Index=7, PLAYER_ID=201569, PLAYER_NAME='Eric Gordon')
Pandas(Index=9, PLAYER_ID=101123, PLAYER_NAME='Gerald Green')
Pandas(Index=13, PLAYER_ID=201935, PLAYER_NAME='James Harden')
Pandas(Index=19, PLAYER_ID=200782, PLAYER_NAME='P.J. Tucker')
Pandas(Index=2, PLAYER_ID=202711, PLAYER_NAME='Bojan Bogdanovic')
Pandas(Index=4, PLAYER_ID=201954, PLAYER_NAME='Darren Collison')
Pandas(Index=6, PLAYER_ID=1627734, PLAYER_NAME='

Pandas(Index=10, PLAYER_ID=1628386, PLAYER_NAME='Jarrett Allen')
Pandas(Index=12, PLAYER_ID=203925, PLAYER_NAME='Joe Harris')
Pandas(Index=18, PLAYER_ID=203915, PLAYER_NAME='Spencer Dinwiddie')
Pandas(Index=19, PLAYER_ID=1627752, PLAYER_NAME='Taurean Prince')
Pandas(Index=3, PLAYER_ID=203469, PLAYER_NAME='Cody Zeller')
Pandas(Index=4, PLAYER_ID=1628984, PLAYER_NAME="Devonte' Graham")
Pandas(Index=11, PLAYER_ID=1628970, PLAYER_NAME='Miles Bridges')
Pandas(Index=13, PLAYER_ID=1629023, PLAYER_NAME='P.J. Washington')
Pandas(Index=14, PLAYER_ID=1626179, PLAYER_NAME='Terry Rozier')
Pandas(Index=2, PLAYER_ID=1629632, PLAYER_NAME='Coby White')
Pandas(Index=7, PLAYER_ID=1628374, PLAYER_NAME='Lauri Markkanen')
Pandas(Index=13, PLAYER_ID=201152, PLAYER_NAME='Thaddeus Young')
Pandas(Index=14, PLAYER_ID=203107, PLAYER_NAME='Tomas Satoransky')
Pandas(Index=16, PLAYER_ID=203897, PLAYER_NAME='Zach LaVine')
Pandas(Index=4, PLAYER_ID=1626224, PLAYER_NAME='Cedi Osman')
Pandas(Index=5, PLAYER_ID=1629012, 

Pandas(Index=10, PLAYER_ID=1627783, PLAYER_NAME='Pascal Siakam')
Pandas(Index=14, PLAYER_ID=201586, PLAYER_NAME='Serge Ibaka')
Pandas(Index=0, PLAYER_ID=202711, PLAYER_NAME='Bojan Bogdanovic')
Pandas(Index=2, PLAYER_ID=1628378, PLAYER_NAME='Donovan Mitchell')
Pandas(Index=8, PLAYER_ID=204060, PLAYER_NAME='Joe Ingles')
Pandas(Index=9, PLAYER_ID=203903, PLAYER_NAME='Jordan Clarkson')
Pandas(Index=12, PLAYER_ID=201144, PLAYER_NAME='Mike Conley')
Pandas(Index=17, PLAYER_ID=203497, PLAYER_NAME='Rudy Gobert')
Pandas(Index=2, PLAYER_ID=203078, PLAYER_NAME='Bradley Beal')
Pandas(Index=5, PLAYER_ID=202722, PLAYER_NAME='Davis Bertans')
Pandas(Index=11, PLAYER_ID=202397, PLAYER_NAME='Ish Smith')
Pandas(Index=19, PLAYER_ID=1629060, PLAYER_NAME='Rui Hachimura')
Pandas(Index=21, PLAYER_ID=1628418, PLAYER_NAME='Thomas Bryant')
Pandas(Index=22, PLAYER_ID=1628972, PLAYER_NAME='Troy Brown Jr.')
2020-21
Pandas(Index=0, PLAYER_ID=203992, PLAYER_NAME='Bogdan Bogdanovic')
Pandas(Index=4, PLAYER_ID=203991, P

Pandas(Index=6, PLAYER_ID=1629028, PLAYER_NAME='Deandre Ayton')
Pandas(Index=7, PLAYER_ID=1626164, PLAYER_NAME='Devin Booker')
Pandas(Index=10, PLAYER_ID=203109, PLAYER_NAME='Jae Crowder')
Pandas(Index=14, PLAYER_ID=1628969, PLAYER_NAME='Mikal Bridges')
Pandas(Index=2, PLAYER_ID=203468, PLAYER_NAME='CJ McCollum')
Pandas(Index=3, PLAYER_ID=2546, PLAYER_NAME='Carmelo Anthony')
Pandas(Index=4, PLAYER_ID=203081, PLAYER_NAME='Damian Lillard')
Pandas(Index=6, PLAYER_ID=202683, PLAYER_NAME='Enes Freedom')
Pandas(Index=7, PLAYER_ID=1629018, PLAYER_NAME='Gary Trent Jr.')
Pandas(Index=0, PLAYER_ID=1627741, PLAYER_NAME='Buddy Hield')
Pandas(Index=6, PLAYER_ID=1628368, PLAYER_NAME="De'Aaron Fox")
Pandas(Index=9, PLAYER_ID=203084, PLAYER_NAME='Harrison Barnes')
Pandas(Index=16, PLAYER_ID=1628963, PLAYER_NAME='Marvin Bagley III')
Pandas(Index=20, PLAYER_ID=1626158, PLAYER_NAME='Richaun Holmes')
Pandas(Index=23, PLAYER_ID=1630169, PLAYER_NAME='Tyrese Haliburton')
Pandas(Index=1, PLAYER_ID=201942, PLA

#### Get list of MVPs

In [None]:
from bs4 import BeautifulSoup

URL = "https://www.nba.com/news/history-mvp-award-winners"
page = requests.get(URL)

#webscrape NBA.com's mvp page for MVP names and corresponding years
def get_MVP_List():
  soup = BeautifulSoup(page.content, "html.parser")
  results = soup.find(id = "__next")
  players = results.find_all("div", class_="Article_article__2Ue3h")
  rawData = []
  for sample in players:
    mvps = sample.find_all("p")
    for mvp in mvps:
      rawData.append(str(mvp))

  rawData = rawData[2:]
  mvp_list = []
  for j in rawData:
    j = j[3:]                   #get rid of <p> and </p>
    j = j[:-4]
    info = j.split()  
    mvp_list.append([info[0], info[2] + " " + info[3], info[4] + " " + info[5]])

  return mvp_list

mvp_data = get_MVP_List()

mvp_dataframe = pd.DataFrame(mvp_data, columns = ["season", "Name", "Team"])
mvp_dataframe.to_csv("MVP_List.csv", index = False)

# Split Years Using K-Fold 

In [None]:
from sklearn.model_selection import KFold

kf = KFold(n_splits = 5, shuffle = True, random_state = None)
mvp_list = pd.read_csv (r'/content/drive/MyDrive/MVP_List.csv')
samples = mvp_list.loc[mvp_list["season"] >= "1996-97"]
years = samples["season"]

trainingSamples = years.sample(frac = 0.8)
testSamples = samples.drop(trainingSamples.index)

folds = []
for train, valid in kf.split(trainingSamples):
  folds.append([trainingSamples.iloc[train].values.tolist(), trainingSamples.iloc[valid].values.tolist()])
  