In [1]:
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

pd.set_option('display.max_columns', None)

The API that used information from stats.nba.com (NBA_API) contains countless team and individual stats from every game of every season in the NBA's history, we will use this as the primary source of statistics in our prediction model, our first step will be to install this api.

In [None]:
#pip install nba_api

Our API has many endpoints, table of contents found here (https://github.com/swar/nba_api/blob/master/docs/table_of_contents.md) we will use a few of these to help gather box score statistics from all regular season games over the last 5 seasons

In [2]:
from nba_api.stats.endpoints import leaguegamefinder

gamefinder = leaguegamefinder.LeagueGameFinder()
games = gamefinder.get_data_frames()[0]
games.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22021,1610612762,UTA,Utah Jazz,22100054,2021-10-26,UTA vs. DEN,W,240,122,44,85,0.518,12,39,0.308,22,29,0.759,8,33,41,20,5,6,13,22,12.0
1,22021,1610612755,PHI,Philadelphia 76ers,22100050,2021-10-26,PHI @ NYK,L,239,99,36,82,0.439,12,41,0.293,15,17,0.882,12,28,40,24,9,4,14,14,-13.0
2,22021,1610612752,NYK,New York Knicks,22100050,2021-10-26,NYK vs. PHI,W,239,112,41,87,0.471,16,37,0.432,14,15,0.933,13,32,45,24,7,5,12,18,13.0
3,22021,1610612744,GSW,Golden State Warriors,22100051,2021-10-26,GSW @ OKC,W,240,106,39,83,0.47,14,40,0.35,14,18,0.778,7,39,46,30,9,8,15,20,8.0
4,22021,1610612759,SAS,San Antonio Spurs,22100053,2021-10-26,SAS vs. LAL,L,263,121,48,108,0.444,13,36,0.361,12,22,0.545,16,43,59,33,10,6,15,22,-4.0


The API contains game information of WNBA, G-League, Summer League and even the NBA's Gaming League, lets limit our data to just regular season NBA games since the 2016-17 season, creating a seperate dataframe for each season with only the regular season games

In [14]:
nba_teams = ['Golden State Warriors','Boston Celtics','Toronto Raptors','Houston Rockets','Cleveland Cavaliers','Miami Heat','San Antonio Spurs','Milwaukee Bucks','Portland Trail Blazers','Utah Jazz','Atlanta Hawks','Los Angeles Lakers','Philadelphia 76ers','Memphis Grizzlies','Denver Nuggets','Washington Wizards','Dallas Mavericks','LA Clippers','Oklahoma City Thunder','Brooklyn Nets','Indiana Pacers','Phoenix Suns','New Orleans Pelicans','Chicago Bulls','Sacramento Kings','Orlando Magic','Charlotte Hornets','Detroit Pistons','Minnesota Timberwolves','New York Knicks']

In [15]:
games_nba = games.loc[games['TEAM_NAME'].apply(lambda x: x in (nba_teams))]

In [16]:
season21 = games_nba.loc[games_nba.SEASON_ID == '22020'].reset_index(drop=True)
season20 = games_nba.loc[games_nba.SEASON_ID == '22019'].reset_index(drop=True)
season19 = games_nba.loc[games_nba.SEASON_ID == '22018'].head(2460).reset_index(drop=True)
season18 = games_nba.loc[games_nba.SEASON_ID == '22017'].head(2460).reset_index(drop=True)
season17 = games_nba.loc[games_nba.SEASON_ID == '22016'].head(2460).reset_index(drop=True)

season20 = season20.loc[season20.GAME_DATE >= '2019-10-22']
season20 = season20.reset_index(drop=True)

At the moment our season data frames contain two rows for each game (one for each team) using this handy combine_team_games function, we can merge the dataframe so there is only one row per game, and the home and away teams and statistics are labeled

In [3]:
def combine_team_games(df, keep_method='home'):
    '''Combine a TEAM_ID-GAME_ID unique table into rows by game. Slow.

        Parameters
        ----------
        df : Input DataFrame.
        keep_method : {'home', 'away', 'winner', 'loser', ``None``}, default 'home'
            - 'home' : Keep rows where TEAM_A is the home team.
            - 'away' : Keep rows where TEAM_A is the away team.
            - 'winner' : Keep rows where TEAM_A is the losing team.
            - 'loser' : Keep rows where TEAM_A is the winning team.
            - ``None`` : Keep all rows. Will result in an output DataFrame the same
                length as the input DataFrame.
                
        Returns
        -------
        result : DataFrame
    '''
    # Join every row to all others with the same game ID.
    joined = pd.merge(df, df, suffixes=['_Home', '_Away'],
                      on=['SEASON_ID', 'GAME_ID', 'GAME_DATE'])
    # Filter out any row that is joined to itself.
    result = joined[joined.TEAM_ID_Home != joined.TEAM_ID_Away]
    # Take action based on the keep_method flag.
    if keep_method is None:
        # Return all the rows.
        pass
    elif keep_method.lower() == 'home':
        # Keep rows where TEAM_A is the home team.
        result = result[result.MATCHUP_Home.str.contains(' vs. ')]
    elif keep_method.lower() == 'away':
        # Keep rows where TEAM_A is the away team.
        result = result[result.MATCHUP_A.str.contains(' @ ')]
    elif keep_method.lower() == 'winner':
        result = result[result.WL_A == 'W']
    elif keep_method.lower() == 'loser':
        result = result[result.WL_A == 'L']
    else:
        raise ValueError(f'Invalid keep_method: {keep_method}')
    return result

In [17]:
loop_season21 = combine_team_games(season21)
loop_season20 = combine_team_games(season20)
loop_season19 = combine_team_games(season19)
loop_season18 = combine_team_games(season18)
loop_season17 = combine_team_games(season17)

In [18]:
loop_season21.reset_index(inplace=True, drop=True)
loop_season20.reset_index(inplace=True, drop=True)
loop_season19.reset_index(inplace=True, drop=True)
loop_season18.reset_index(inplace=True, drop=True)
loop_season17.reset_index(inplace=True, drop=True)

Using our 'loop_season' dataframes we will create dataframes that contain all advanced statistics and four factors statistics for each game in our datasets, take a peak at what the first game from the 2021 advanced and four factors dataframe looks like

In [20]:
from nba_api.stats.endpoints import boxscoreadvancedv2
#Advanced Stats by gameID
boxscore21 = boxscoreadvancedv2.BoxScoreAdvancedV2(game_id = loop_season21['GAME_ID'][0])
bs_df21 = boxscore21.get_data_frames()[1]
bs_df21

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_NAME,TEAM_ABBREVIATION,TEAM_CITY,MIN,E_OFF_RATING,OFF_RATING,E_DEF_RATING,DEF_RATING,E_NET_RATING,NET_RATING,AST_PCT,AST_TOV,AST_RATIO,OREB_PCT,DREB_PCT,REB_PCT,E_TM_TOV_PCT,TM_TOV_PCT,EFG_PCT,TS_PCT,USG_PCT,E_USG_PCT,E_PACE,PACE,PACE_PER40,POSS,PIE
0,22001066,1610612745,Rockets,HOU,Houston,240:00,89.8,93.1,119.9,120.4,-30.1,-27.3,0.806,1.56,17.6,0.25,0.702,0.441,15.123,15.7,0.435,0.471,1.0,0.199,104.62,102.5,85.42,102,0.322
1,22001066,1610612737,Hawks,ATL,Atlanta,240:00,119.9,120.4,89.8,93.1,30.1,27.3,0.653,2.46,21.9,0.298,0.75,0.559,12.568,12.6,0.583,0.611,1.0,0.199,104.62,102.5,85.42,103,0.678


In [None]:
for n in range(1, len(loop_season21)):
    dfn = boxscoreadvancedv2.BoxScoreAdvancedV2(game_id = (loop_season21['GAME_ID'][n]))
    dfn2 = dfn.get_data_frames()[1]
    bs_df21 = bs_df21.append(dfn2)


In [21]:
from nba_api.stats.endpoints import boxscorefourfactorsv2
#FourFactors
ffboxscore21 = boxscorefourfactorsv2.BoxScoreFourFactorsV2(game_id = (loop_season21['GAME_ID'][0]))
ff_df21 = ffboxscore21.get_data_frames()[1]
ff_df21

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_NAME,TEAM_ABBREVIATION,TEAM_CITY,MIN,EFG_PCT,FTA_RATE,TM_TOV_PCT,OREB_PCT,OPP_EFG_PCT,OPP_FTA_RATE,OPP_TOV_PCT,OPP_OREB_PCT
0,22001066,1610612745,Rockets,HOU,Houston,240:00,0.435,0.217,0.151,0.172,0.583,0.289,0.126,0.298
1,22001066,1610612737,Hawks,ATL,Atlanta,240:00,0.583,0.289,0.126,0.234,0.435,0.217,0.151,0.25


In [None]:
for n in range(1, len(loop_season21)):
    dfn = boxscorefourfactorsv2.BoxScoreFourFactorsV2(game_id = (loop_season21['GAME_ID'][n]))
    dfn2 = dfn.get_data_frames()[1]
    ff_df21 = ff_df21.append(dfn2)

Now we will repeat this process for the rest of the seasons

In [None]:
boxscore20 = boxscoreadvancedv2.BoxScoreAdvancedV2(game_id = loop_season20['GAME_ID'][0])
boxscore19 = boxscoreadvancedv2.BoxScoreAdvancedV2(game_id = loop_season19['GAME_ID'][0])
boxscore18 = boxscoreadvancedv2.BoxScoreAdvancedV2(game_id = loop_season18['GAME_ID'][0])
boxscore17 = boxscoreadvancedv2.BoxScoreAdvancedV2(game_id = loop_season17['GAME_ID'][0])
bs_df20 = boxscore20.get_data_frames()[1]
bs_df19 = boxscore19.get_data_frames()[1]
bs_df18 = boxscore18.get_data_frames()[1]
bs_df17 = boxscore17.get_data_frames()[1]

In [None]:
ffboxscore20 = boxscorefourfactorsv2.BoxScoreFourFactorsV2(game_id = (loop_season20['GAME_ID'][0]))
ffboxscore19 = boxscorefourfactorsv2.BoxScoreFourFactorsV2(game_id = (loop_season19['GAME_ID'][0]))
ffboxscore18 = boxscorefourfactorsv2.BoxScoreFourFactorsV2(game_id = (loop_season18['GAME_ID'][0]))
ffboxscore17 = boxscorefourfactorsv2.BoxScoreFourFactorsV2(game_id = (loop_season17['GAME_ID'][0]))
ff_df20 = ffboxscore20.get_data_frames()[1]
ff_df19 = ffboxscore19.get_data_frames()[1]
ff_df18 = ffboxscore.get_data_frames()[1]
ff_df17 = ffboxscore.get_data_frames()[1]

Using this combine_dfs function we can combine the three dataframes from each year into one, and drop the columns that are in common

In [None]:
def combine_dfs(season_df, bs_df, ff_df):
    merge1 = pd.merge(season_df, bs_df, on=['GAME_ID', 'TEAM_ID']).drop(labels=['TEAM_NAME_y', 'TEAM_ABBREVIATION_y', 'MIN_y', 'TEAM_CITY'], axis=1)
    merge2 = pd.merge(merge1, ff_df, on = ['GAME_ID', 'TEAM_ID']).drop(labels=['TEAM_NAME', 'TEAM_ABBREVIATION', 'TEAM_CITY', 'MIN','EFG_PCT_x', 'TM_TOV_PCT_x'], axis=1)
    merge2.sort_values(by=["GAME_DATE", 'GAME_ID'], ascending=[1,1], inplace=True)
    merge2.reset_index(inplace=True, drop=True)
    return merge2

In [None]:
splitdf21 = combine_dfs(season21, bs21, ff21)
splitdf20 = combine_dfs(season20, bs20, ff20)
splitdf19 = combine_dfs(season19, bs19, ff19)
splitdf18 = combine_dfs(season18, bs18, ff18)
splitdf17 = combine_dfs(season17, bs17, ff17)

Now we can create dataframes for each game's player statistics, this will help the model be able to capture when top players are injured or resting for a given game

In [None]:
from nba_api.stats.endpoints import boxscoreadvancedv2

bsa = boxscoreadvancedv2.BoxScoreAdvancedV2(game_id = loop_season21['GAME_ID'][0])
bsa = bsa.get_data_frames()[0]
starters21 = bsa.loc[bsa['START_POSITION'].isin(['F', 'C', 'G'])]

for n in range(1, (len(loop_season21))):
    bsa = boxscoreadvancedv2.BoxScoreAdvancedV2(game_id = loop_season21['GAME_ID'][n])
    bsa = bsa.get_data_frames()[0]
    bsa = bsa.loc[bsa['START_POSITION'].isin(['F', 'C', 'G'])]
    starters21 = starters21.append(bsa)

In [None]:
bsa = boxscoreadvancedv2.BoxScoreAdvancedV2(game_id = loop_season20['GAME_ID'][0])
bsa = bsa.get_data_frames()[0]
starters20 = bsa.loc[bsa['START_POSITION'].isin(['F', 'C', 'G'])]

for n in range(1, (len(loop_season20))):
    bsa = boxscoreadvancedv2.BoxScoreAdvancedV2(game_id = loop_season20['GAME_ID'][n])
    bsa = bsa.get_data_frames()[0]
    bsa = bsa.loc[bsa['START_POSITION'].isin(['F', 'C', 'G'])]
    starters20 = starters20.append(bsa)

In [None]:
bsa = boxscoreadvancedv2.BoxScoreAdvancedV2(game_id = loop_season19['GAME_ID'][0])
bsa = bsa.get_data_frames()[0]
starters19 = bsa.loc[bsa['START_POSITION'].isin(['F', 'C', 'G'])]

for n in range(1, (len(loop_seasonf19))):
    bsa = boxscoreadvancedv2.BoxScoreAdvancedV2(game_id = loop_season19['GAME_ID'][n])
    bsa = bsa.get_data_frames()[0]
    bsa = bsa.loc[bsa['START_POSITION'].isin(['F', 'C', 'G'])]
    starters19 = starters19.append(bsa)

In [None]:
bsa = boxscoreadvancedv2.BoxScoreAdvancedV2(game_id = loop_season18['GAME_ID'][0])
bsa = bsa.get_data_frames()[0]
starters18 = bsa.loc[bsa['START_POSITION'].isin(['F', 'C', 'G'])]

for n in range(1, (len(testdf18))):
    bsa = boxscoreadvancedv2.BoxScoreAdvancedV2(game_id = loop_season18['GAME_ID'][n])
    bsa = bsa.get_data_frames()[0]
    bsa = bsa.loc[bsa['START_POSITION'].isin(['F', 'C', 'G'])]
    starters18 = starters18.append(bsa)

In [None]:
bsa = boxscoreadvancedv2.BoxScoreAdvancedV2(game_id = loop_season17['GAME_ID'][0])
bsa = bsa.get_data_frames()[0]
starters17 = bsa.loc[bsa['START_POSITION'].isin(['F', 'C', 'G'])]

for n in range(1, (len(testdf17))):
    bsa = boxscoreadvancedv2.BoxScoreAdvancedV2(game_id = loop_season17['GAME_ID'][n])
    bsa = bsa.get_data_frames()[0]
    bsa = bsa.loc[bsa['START_POSITION'].isin(['F', 'C', 'G'])]
    starters17 = starters17.append(bsa)

Lets upload data containing the betting statistics from each game, these files were obtained from https://www.sportsbookreviewsonline.com/scoresoddsarchives/nba/nbaoddsarchives.htm. 

The downloaded versions contain the MoneyLine, Spread and Over-Under for each game. We can use these to test if our model can produce a profitable betting strategy for future NBA games

In [4]:
odds21 = pd.read_excel('Odds-Data-Clean/nba odds 2020-21.xlsx')
odds20 = pd.read_excel('Odds-Data-Clean/nba odds 2019-20.xlsx')
odds19 = pd.read_excel('Odds-Data-Clean/nba odds 2018-19.xlsx')
odds18 = pd.read_excel('Odds-Data-Clean/nba odds 2017-18.xlsx')
odds17 = pd.read_excel('Odds-Data-Clean/nba odds 2016-17.xlsx')

In [5]:
odds21.head()

Unnamed: 0,Date,Rot,VH,Team,1st,2nd,3rd,4th,Final,Open,Close,ML,2H
0,1222,501,V,GoldenState,25,20,26,28,99,228.0,234.5,265,1.0
1,1222,502,H,Brooklyn,40,23,36,26,125,5.5,7.5,-320,115.0
2,1222,503,V,LAClippers,39,17,33,27,116,223.0,219.0,115,108.5
3,1222,504,H,LALakers,19,35,24,31,109,2.5,2.0,-135,3.5
4,1223,551,V,Charlotte,23,21,36,34,114,215.0,3.0,-140,7.0


Lets drop all non-regular season games

In [None]:
odds21 = odds21[:2160]
odds20 = odds20[:2118]
odds19 = odds19[:2460]
odds18 = odds18[:2460]
odds17 = odds17[:2460]

These dataframes use the phrase 'pk' to describe a game that the sportsbook do not put a spread on, meaning the teams are believed to be even, lets change this to a 0, so our computer can comprehend it easier.

In [6]:
for ind in odds21.index:
    if odds21['Close'][ind] == 'pk' or odds21['Close'][ind] == 'PK':
        odds21['Close'][ind] = 0
for ind in odds20.index:
    if odds20['Close'][ind] == 'pk' or odds20['Close'][ind] == 'PK':
        odds20['Close'][ind] = 0
for ind in odds19.index:
    if odds19['Close'][ind] == 'pk' or odds19['Close'][ind] == 'PK':
        odds19['Close'][ind] = 0
for ind in odds18.index:
    if odds18['Close'][ind] == 'pk' or odds18['Close'][ind] == 'PK':
        odds18['Close'][ind] = 0
for ind in odds17.index:
    if odds17['Close'][ind] == 'pk' or odds17['Close'][ind] == 'PK':
        odds17['Close'][ind] = 0

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  odds21['Close'][ind] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  odds20['Close'][ind] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  odds19['Close'][ind] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  odds18['Close'][ind] = 0
A value is trying to be set on a copy of a slice from a DataFrame

S

Using the convert_to_datetime function below to change the date column so the computer will recognize it as a time variable, this will also make it easier to merge this data with the game data

In [7]:
from datetime import datetime

def convert_to_datetime(x, year):
    y = str(x)
    if x < 1000:
        date = datetime(year=year, month=int(y[:-2]), day=int(y[-2:]))
        return date.strftime("%Y-%m-%d")
    else:
        date = datetime(year=year-1, month=int(y[:-2]), day=int(y[-2:]))
        return date.strftime("%Y-%m-%d")


odds21['Date'] = odds21['Date'].apply(lambda x: convert_to_datetime(x, 2021))
odds20['Date'] = odds20['Date'].apply(lambda x: convert_to_datetime(x, 2020))
odds19['Date'] = odds19['Date'].apply(lambda x: convert_to_datetime(x, 2019))
odds18['Date'] = odds18['Date'].apply(lambda x: convert_to_datetime(x, 2018))
odds17['Date'] = odds17['Date'].apply(lambda x: convert_to_datetime(x, 2017))

With the function below, we can create a more organized dataframe, where there are specific columns for the spread and over/under.

In [8]:
def make_spreads_overs(df):
    spread_col = []
    o_u_col = []
    ind = 0
    while ind < len(df) - 1:
        pair = (float(df['Close'][ind]), float(df['Close'][ind + 1]))
        spread_index = 0 if pair[0] < 120 else 1
        spread = pair[spread_index]
        o_u_index = 0 if pair[0] > 120 else 1
        o_u = pair[o_u_index]
    
        if spread_index == 0:
            spread_col.append(-spread)
            spread_col.append(spread)
            o_u_col.append(o_u)
            o_u_col.append(o_u)
        else:
            spread_col.append(spread)
            spread_col.append(-spread)
            o_u_col.append(o_u)
            o_u_col.append(o_u)
        ind += 2
    df['Spread'] = spread_col
    df['Over/Under'] = o_u_col   
    
make_spreads_overs(odds21)
make_spreads_overs(odds20)
make_spreads_overs(odds19)
make_spreads_overs(odds18)
make_spreads_overs(odds17)

In [9]:
odds21.head()

Unnamed: 0,Date,Rot,VH,Team,1st,2nd,3rd,4th,Final,Open,Close,ML,2H,Spread,Over/Under
0,2020-12-22,501,V,GoldenState,25,20,26,28,99,228.0,234.5,265,1.0,7.5,234.5
1,2020-12-22,502,H,Brooklyn,40,23,36,26,125,5.5,7.5,-320,115.0,-7.5,234.5
2,2020-12-22,503,V,LAClippers,39,17,33,27,116,223.0,219.0,115,108.5,2.0,219.0
3,2020-12-22,504,H,LALakers,19,35,24,31,109,2.5,2.0,-135,3.5,-2.0,219.0
4,2020-12-23,551,V,Charlotte,23,21,36,34,114,215.0,3.0,-140,7.0,-3.0,217.0


Let's get rid of the columns we don't need and change the team names so they match the dataframes we made earlier

In [10]:
odds21 = odds21[['Date', 'Team', 'Over/Under', 'Spread', 'ML']]
odds20 = odds20[['Date', 'Team', 'Over/Under', 'Spread', 'ML']]
odds19 = odds19[['Date', 'Team', 'Over/Under', 'Spread', 'ML']]
odds18 = odds18[['Date', 'Team', 'Over/Under', 'Spread', 'ML']]
odds17 = odds17[['Date', 'Team', 'Over/Under', 'Spread', 'ML']]

In [11]:
team_dict = {'Atlanta':'Atlanta Hawks', 'Boston':'Boston Celtics', 'Brooklyn':'Brooklyn Nets','Charlotte':'Charlotte Hornets',
             'Chicago':'Chicago Bulls','Cleveland':'Cleveland Cavaliers','Dallas':'Dallas Mavericks', 'Denver':'Denver Nuggets',
             'Detroit':'Detroit Pistons','GoldenState':'Golden State Warriors', 'Golden State': 'Golden State Warriors', 'Houston':'Houston Rockets', 
             'Indiana':'Indiana Pacers','LAClippers':'LA Clippers', 'LALakers':'Los Angeles Lakers', 'LA Lakers': 'Los Angeles Lakers', 'Memphis':'Memphis Grizzlies',
             'Miami':'Miami Heat','Milwaukee':'Milwaukee Bucks','Minnesota':'Minnesota Timberwolves','NewOrleans':'New Orleans Pelicans', 
             'New Orleans': 'New Orleans Pelicans', 'NewYork':'New York Knicks','New York':'New York Knicks','OklahomaCity':'Oklahoma City Thunder','Oklahoma City':'Oklahoma City Thunder',
             'Orlando':'Orlando Magic','Philadelphia':'Philadelphia 76ers','Phoenix':'Phoenix Suns','Portland':'Portland Trail Blazers','Sacramento':'Sacramento Kings',
             'SanAntonio':'San Antonio Spurs', 'San Antonio':'San Antonio Spurs','Toronto':'Toronto Raptors','Utah':'Utah Jazz','Washington':'Washington Wizards'}

odds21['Team'] = odds21['Team'].map(team_dict)
odds20['Team'] = odds20['Team'].map(team_dict)
odds19['Team'] = odds19['Team'].map(team_dict)
odds18['Team'] = odds18['Team'].map(team_dict)
odds17['Team'] = odds17['Team'].map(team_dict)




In [22]:
#Mistake from Dataframe
odds21['Date'][76] = '2020-12-27'
odds21['Date'][77] = '2020-12-27'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  odds21['Date'][76] = '2020-12-27'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  odds21['Date'][77] = '2020-12-27'


Let's see how it looks now

In [12]:
odds21.head()

Unnamed: 0,Date,Team,Over/Under,Spread,ML
0,2020-12-22,Golden State Warriors,234.5,7.5,265
1,2020-12-22,Brooklyn Nets,234.5,-7.5,-320
2,2020-12-22,LA Clippers,219.0,2.0,115
3,2020-12-22,Los Angeles Lakers,219.0,-2.0,-135
4,2020-12-23,Charlotte Hornets,217.0,-3.0,-140
