In [6]:
import pandas as pd
import numpy as np
from functools import reduce
from nba_api.stats.endpoints import leaguegamelog
from nba_api.stats.static import teams 
import helper_functions as hf

In [23]:
pd.options.mode.chained_assignment = None  # default='warn'
teams_list = teams.get_teams()

teams_per = dict()

for team in teams_list:
    team_id = team['id']
    teams_per[team_id] = 0

seasons_teams = []
seasons_players = []
first_season = 2017
last_season = 2019
first_season_id = 20000 + first_season

print("Getting NBA Seasons Information...")
for i in range(first_season,last_season):
    season_i_teams = leaguegamelog.LeagueGameLog(season = str(i)).get_data_frames()[0]
    season_i_players = leaguegamelog.LeagueGameLog(season = str(i), player_or_team_abbreviation = 'P').get_data_frames()[0]
    seasons_teams.append(season_i_teams)
    seasons_players.append(season_i_players)
    print("{}/{}".format(i,last_season))


dfs = []

print("Cleaning the data...")

season_games = reduce(lambda  left,right: pd.merge(left,right, how='outer'), seasons_teams)
season_games_plyrs = reduce(lambda  left,right: pd.merge(left,right, how='outer'), seasons_players)
season_games.dropna(subset=['FG_PCT','FT_PCT','FG3_PCT'], inplace=True)

season_games_plyrs['GAME_ID'] = pd.to_numeric(season_games_plyrs['GAME_ID'])
season_games['GAME_ID'] = pd.to_numeric(season_games['GAME_ID'])
season_games['GAME_DATE'] = pd.to_datetime(season_games['GAME_DATE'])
season_games_plyrs['GAME_DATE'] = pd.to_datetime(season_games_plyrs['GAME_DATE'])

print('size', len(season_games.index))

Getting NBA Seasons Information...
2017/2019
2018/2019
Cleaning the data...
size 4920


In [24]:
print("Initializing ELOs...")

elo_dic = dict()

for team in teams_list:
    elo_dic[team['id']] = 1500

matches_organized = []
matches_organized_lstm = []

n_last_games = 10
n_last_specific_games = 5

season_id = ''    
print('Getting historical odds...')
odds = hf.load_bets_csv()

Initializing ELOs...
Getting historical odds...


In [25]:
print("Creating CSV file of all games...")
for i, g in season_games.groupby(season_games.index // 2):
    print("{}/{}".format(i, len(season_games.index) // 2))
    if g.iloc[[0],:].iloc[0]['WL'] == None:
        break

    if season_id != '' and season_id != g.iloc[[0],:].iloc[0]['SEASON_ID']:
        hf.reset_season_elo(season_id, g, elo_dic)

    season_id = g.iloc[[0],:].iloc[0]['SEASON_ID']
        
    game_id = g.iloc[[0],:].iloc[0]['GAME_ID']
    game_date = g.iloc[[0],:].iloc[0]['GAME_DATE']

    team_a_id = g.iloc[[0],:].iloc[0]['TEAM_ID']
    team_b_id = g.iloc[1:2,:].iloc[0]['TEAM_ID']

    team_a_abbv = g.iloc[[0],:].iloc[0]['TEAM_ABBREVIATION']
    team_b_abbv = g.iloc[1:2,:].iloc[0]['TEAM_ABBREVIATION']
    
    winner = 'A' if g.iloc[[0],:].iloc[0]['WL'] == 'W' else 'B'
    
    # Update ELO after stats computed
    team_a_pts = g.iloc[[0],:].iloc[0]['PTS']
    team_b_pts = g.iloc[1:2,:].iloc[0]['PTS']
    elo_a = elo_dic[team_a_id]
    elo_b = elo_dic[team_b_id]

    if '@' in g.iloc[[0],:].iloc[0]['MATCHUP']:
        team_b_odds, team_a_odds = hf.get_teams_odds(team_b_id, team_a_id, game_date, odds)
    else:
        team_a_odds, team_b_odds = hf.get_teams_odds(team_a_id, team_b_id, game_date, odds)

    team_a_previous_games = season_games.loc[(season_games['TEAM_ID'] == team_a_id) & (season_games['GAME_DATE'] < game_date)]
    team_b_previous_games = season_games.loc[(season_games['TEAM_ID'] == team_b_id) & (season_games['GAME_DATE'] < game_date)]
    team_a_season_games = team_a_previous_games.loc[team_a_previous_games['SEASON_ID'] == season_id]
    team_b_season_games = team_b_previous_games.loc[team_b_previous_games['SEASON_ID'] == season_id]

    # Getting teams last 10 games
    team_a_previous_n_games = team_a_season_games.iloc[-n_last_games:,:]
    team_b_previous_n_games = team_b_season_games.iloc[-n_last_games:,:]

    if len(team_a_previous_games.index) > 0:
        if team_a_previous_games.iloc[-1]['GAME_ID'] == g.iloc[[0],:].iloc[0]['GAME_ID']:
            break

    if not (int(season_id) >= first_season_id and len(team_a_previous_n_games.index) >= 5 and len(team_b_previous_n_games.index) >= 5 and team_a_odds != None and team_b_odds != None):
        print("Not enough games.")
        hf.update_elo(winner, elo_a, elo_b, elo_dic, team_a_id, team_b_id, team_a_pts, team_b_pts)
        continue

    # Getting player information
    teams_per[team_a_id] = hf.get_team_per_mean(team_a_id, game_id, game_date, season_id, season_games_plyrs)
    teams_per[team_b_id] = hf.get_team_per_mean(team_b_id, game_id, game_date, season_id, season_games_plyrs)

    # Season Win Percentage
    team_a_season_pct = hf.get_wl_pct(team_a_season_games)[0]
    team_b_season_pct = hf.get_wl_pct(team_b_season_games)[0]

    # Poins Conceded
    team_a_previous_games_pts_conceded = hf.team_points_conceded(team_a_previous_n_games, season_games)
    team_b_previous_games_pts_conceded = hf.team_points_conceded(team_b_previous_n_games, season_games)

    stats_team_a = hf.get_team_stats (team_a_previous_n_games, team_a_previous_games_pts_conceded, team_a_season_pct, elo_a, teams_per[team_a_id], team_a_odds)
    stats_team_b = hf.get_team_stats (team_b_previous_n_games, team_b_previous_games_pts_conceded, team_b_season_pct, elo_b, teams_per[team_b_id], team_b_odds)

    if '@' in g.iloc[[0],:].iloc[0]['MATCHUP']:
        matches_organized.append([season_id, game_date, team_b_abbv, team_a_abbv] + stats_team_b + stats_team_a + [1 if winner == 'B' else 0])
    else:
        matches_organized.append([season_id, game_date, team_a_abbv, team_b_abbv] + stats_team_a + stats_team_b + [1 if winner == 'A' else 0])

    matches_organized_lstm.append([team_a_abbv, team_a_id, game_date, team_a_pts, team_b_pts, g.iloc[[0],:].iloc[0]['FG_PCT'], g.iloc[[0],:].iloc[0]['FG3_PCT'], 
                    g.iloc[[0],:].iloc[0]['FT_PCT'], g.iloc[[0],:].iloc[0]['REB'], g.iloc[[0],:].iloc[0]['TOV'],
                    team_a_season_pct, elo_a, elo_b,
                     teams_per[team_a_id], team_a_odds, team_b_odds, 1 if winner == 'A' else 0])

    matches_organized_lstm.append([team_b_abbv, team_b_id, game_date, team_b_pts, team_a_pts, g.iloc[1:2,:].iloc[0]['FG_PCT'], g.iloc[1:2,:].iloc[0]['FG3_PCT'], 
                    g.iloc[1:2,:].iloc[0]['FT_PCT'], g.iloc[1:2,:].iloc[0]['REB'], g.iloc[1:2,:].iloc[0]['TOV'],
                    team_b_season_pct, elo_b, elo_a,
                     teams_per[team_b_id], team_b_odds, team_a_odds, 1 if winner == 'B' else 0])


    hf.update_elo(winner, elo_a, elo_b, elo_dic, team_a_id, team_b_id, team_a_pts, team_b_pts)

Creating CSV file of all games...
0/2460
Not enough games.
1/2460
Not enough games.
2/2460
Not enough games.
3/2460
Not enough games.
4/2460
Not enough games.
5/2460
Not enough games.
6/2460
Not enough games.
7/2460
Not enough games.
8/2460
Not enough games.
9/2460
Not enough games.
10/2460
Not enough games.
11/2460
Not enough games.
12/2460
Not enough games.
13/2460
Not enough games.
14/2460
Not enough games.
15/2460
Not enough games.
16/2460
Not enough games.
17/2460
Not enough games.
18/2460
Not enough games.
19/2460
Not enough games.
20/2460
Not enough games.
21/2460
Not enough games.
22/2460
Not enough games.
23/2460
Not enough games.
24/2460
Not enough games.
25/2460
Not enough games.
26/2460
Not enough games.
27/2460
Not enough games.
28/2460
Not enough games.
29/2460
Not enough games.
30/2460
Not enough games.
31/2460
Not enough games.
32/2460
Not enough games.
33/2460
Not enough games.
34/2460
Not enough games.
35/2460
Not enough games.
36/2460
Not enough games.
37/2460
Not en

758/2460
759/2460
760/2460
761/2460
762/2460
763/2460
764/2460
765/2460
766/2460
767/2460
768/2460
769/2460
770/2460
771/2460
772/2460
773/2460
774/2460
775/2460
776/2460
777/2460
778/2460
779/2460
780/2460
781/2460
782/2460
783/2460
784/2460
785/2460
786/2460
787/2460
788/2460
789/2460
790/2460
791/2460
792/2460
793/2460
794/2460
795/2460
796/2460
797/2460
798/2460
799/2460
800/2460
801/2460
802/2460
803/2460
804/2460
805/2460
806/2460
807/2460
808/2460
809/2460
810/2460
811/2460
812/2460
813/2460
814/2460
815/2460
816/2460
817/2460
818/2460
819/2460
820/2460
821/2460
822/2460
823/2460
824/2460
825/2460
826/2460
827/2460
828/2460
829/2460
830/2460
831/2460
832/2460
833/2460
834/2460
835/2460
836/2460
837/2460
838/2460
839/2460
840/2460
841/2460
842/2460
843/2460
844/2460
845/2460
846/2460
847/2460
848/2460
849/2460
850/2460
851/2460
852/2460
853/2460
854/2460
855/2460
856/2460
857/2460
858/2460
859/2460
860/2460
861/2460
862/2460
863/2460
864/2460
865/2460
866/2460
867/2460
868/2460
8

1461/2460
1462/2460
1463/2460
1464/2460
1465/2460
1466/2460
1467/2460
1468/2460
1469/2460
1470/2460
1471/2460
1472/2460
1473/2460
1474/2460
1475/2460
1476/2460
1477/2460
1478/2460
1479/2460
1480/2460
1481/2460
1482/2460
1483/2460
1484/2460
1485/2460
1486/2460
1487/2460
1488/2460
1489/2460
1490/2460
1491/2460
1492/2460
1493/2460
1494/2460
1495/2460
1496/2460
1497/2460
1498/2460
1499/2460
1500/2460
1501/2460
1502/2460
1503/2460
1504/2460
1505/2460
1506/2460
1507/2460
1508/2460
1509/2460
1510/2460
1511/2460
1512/2460
1513/2460
1514/2460
1515/2460
1516/2460
1517/2460
1518/2460
1519/2460
1520/2460
1521/2460
1522/2460
1523/2460
1524/2460
1525/2460
1526/2460
1527/2460
1528/2460
1529/2460
1530/2460
1531/2460
1532/2460
1533/2460
1534/2460
1535/2460
1536/2460
1537/2460
1538/2460
1539/2460
1540/2460
1541/2460
1542/2460
1543/2460
1544/2460
1545/2460
1546/2460
1547/2460
1548/2460
1549/2460
1550/2460
1551/2460
1552/2460
1553/2460
1554/2460
1555/2460
1556/2460
1557/2460
1558/2460
1559/2460
1560/2460


2283/2460
2284/2460
2285/2460
2286/2460
2287/2460
2288/2460
2289/2460
2290/2460
2291/2460
2292/2460
2293/2460
2294/2460
2295/2460
2296/2460
2297/2460
2298/2460
2299/2460
2300/2460
2301/2460
2302/2460
2303/2460
2304/2460
2305/2460
2306/2460
2307/2460
2308/2460
2309/2460
2310/2460
2311/2460
2312/2460
2313/2460
2314/2460
2315/2460
2316/2460
2317/2460
2318/2460
2319/2460
2320/2460
2321/2460
2322/2460
2323/2460
2324/2460
2325/2460
2326/2460
2327/2460
2328/2460
2329/2460
2330/2460
2331/2460
2332/2460
2333/2460
2334/2460
2335/2460
2336/2460
2337/2460
2338/2460
2339/2460
2340/2460
2341/2460
2342/2460
2343/2460
2344/2460
2345/2460
2346/2460
2347/2460
2348/2460
2349/2460
2350/2460
2351/2460
2352/2460
2353/2460
2354/2460
2355/2460
2356/2460
2357/2460
2358/2460
2359/2460
2360/2460
2361/2460
2362/2460
2363/2460
2364/2460
2365/2460
2366/2460
2367/2460
2368/2460
2369/2460
2370/2460
2371/2460
2372/2460
2373/2460
2374/2460
2375/2460
2376/2460
2377/2460
2378/2460
2379/2460
2380/2460
2381/2460
2382/2460


In [26]:
final_df = pd.DataFrame(matches_organized, columns=['SEASON_ID', 'GAME_DATE', 'TEAM_A', 'TEAM_B',
                                                    'PTS_A', 'PTS_CON_A', 'FG_PCT_A', 'FG3_PCT_A', 'FT_PCT_A', 'REB_A', 'TOV_A', 'SEASON_A_PCT', 'ELO_A', 'PER_A', 'ODDS_A',
                                                    'PTS_B', 'PTS_CON_B', 'FG_PCT_B', 'FG3_PCT_B', 'FT_PCT_B', 'REB_B', 'TOV_B', 'SEASON_B_PCT', 'ELO_B', 'PER_B', 'ODDS_B',
                                                    'WINNER'])
final_df_lstm = pd.DataFrame(matches_organized_lstm, columns=['TEAM_ABBV', 'TEAM_ID', 'DATE',
                                                    'PTS_A', 'PTS_CON_A', 'FG_PCT_A', 'FG3_PCT_A', 'FT_PCT_A', 'REB_A', 'TOV_A', 
                                                    'SEASON_A_PCT', 'ELO_A', 'ELO_OPP', 'PER_A', 'ODDS_A', 'ODDS_OPP',
                                                    'WINNER'])
final_df.to_csv('../data/{}-{}.csv'.format(first_season, last_season-1))
final_df_lstm.to_csv('../data/LSTM/{}-{}.csv'.format(first_season, last_season-1))