In [1]:
import pandas as pd
import numpy as np
from functools import reduce
from nba_api.stats.endpoints import leaguegamelog
from nba_api.stats.static import teams 
import helper_functions as hf

In [2]:
pd.options.mode.chained_assignment = None  # default='warn'
teams_list = teams.get_teams()

teams_per = dict()

for team in teams_list:
    team_id = team['id']
    teams_per[team_id] = 0

seasons_teams = []
seasons_players = []
first_season = 2012
last_season = 2018
first_season_id = 20000 + first_season

print("Getting NBA Seasons Information...")
for i in range(first_season,last_season):
    season_i_teams = leaguegamelog.LeagueGameLog(season = str(i)).get_data_frames()[0]
    season_i_players = leaguegamelog.LeagueGameLog(season = str(i), player_or_team_abbreviation = 'P').get_data_frames()[0]
    seasons_teams.append(season_i_teams)
    seasons_players.append(season_i_players)
    print("{}/{}".format(i,last_season))


dfs = []

print("Cleaning the data...")

season_games = reduce(lambda  left,right: pd.merge(left,right, how='outer'), seasons_teams)
season_games_plyrs = reduce(lambda  left,right: pd.merge(left,right, how='outer'), seasons_players)
season_games.dropna(subset=['FG_PCT','FT_PCT','FG3_PCT'], inplace=True)

season_games_plyrs['GAME_ID'] = pd.to_numeric(season_games_plyrs['GAME_ID'])
season_games['GAME_ID'] = pd.to_numeric(season_games['GAME_ID'])
season_games['GAME_DATE'] = pd.to_datetime(season_games['GAME_DATE'])
season_games_plyrs['GAME_DATE'] = pd.to_datetime(season_games_plyrs['GAME_DATE'])

print('size', len(season_games.index))

Getting NBA Seasons Information...
2012/2018
2013/2018
2014/2018
2015/2018
2016/2018
2017/2018
Cleaning the data...
size 14758


In [3]:
print("Initializing ELOs...")

elo_dic = dict()

for team in teams_list:
    elo_dic[team['id']] = 1500

matches_organized = []
matches_organized_lstm = []

n_last_games = 10
n_last_specific_games = 5

season_id = ''    
print('Getting historical odds...')
odds = hf.load_bets_csv()

Initializing ELOs...
Getting historical odds...


In [4]:
print("Creating CSV file of all games...")
for i, g in season_games.groupby(season_games.index // 2):
    print("{}/{}".format(i, len(season_games.index) // 2))
    if g.iloc[[0],:].iloc[0]['WL'] == None:
        break

    if season_id != '' and season_id != g.iloc[[0],:].iloc[0]['SEASON_ID']:
        hf.reset_season_elo(season_id, g, elo_dic)

    season_id = g.iloc[[0],:].iloc[0]['SEASON_ID']
        
    game_id = g.iloc[[0],:].iloc[0]['GAME_ID']
    game_date = g.iloc[[0],:].iloc[0]['GAME_DATE']

    team_a_id = g.iloc[[0],:].iloc[0]['TEAM_ID']
    team_b_id = g.iloc[1:2,:].iloc[0]['TEAM_ID']

    team_a_abbv = g.iloc[[0],:].iloc[0]['TEAM_ABBREVIATION']
    team_b_abbv = g.iloc[1:2,:].iloc[0]['TEAM_ABBREVIATION']
    
    winner = 'A' if g.iloc[[0],:].iloc[0]['WL'] == 'W' else 'B'
    
    # Update ELO after stats computed
    team_a_pts = g.iloc[[0],:].iloc[0]['PTS']
    team_b_pts = g.iloc[1:2,:].iloc[0]['PTS']
    elo_a = elo_dic[team_a_id]
    elo_b = elo_dic[team_b_id]

    if '@' in g.iloc[[0],:].iloc[0]['MATCHUP']:
        team_b_odds, team_a_odds = hf.get_teams_odds(team_b_id, team_a_id, game_date, odds)
    else:
        team_a_odds, team_b_odds = hf.get_teams_odds(team_a_id, team_b_id, game_date, odds)

    team_a_previous_games = season_games.loc[(season_games['TEAM_ID'] == team_a_id) & (season_games['GAME_DATE'] < game_date)]
    team_b_previous_games = season_games.loc[(season_games['TEAM_ID'] == team_b_id) & (season_games['GAME_DATE'] < game_date)]
    team_a_season_games = team_a_previous_games.loc[team_a_previous_games['SEASON_ID'] == season_id]
    team_b_season_games = team_b_previous_games.loc[team_b_previous_games['SEASON_ID'] == season_id]

    # Getting teams last 10 games
    team_a_previous_n_games = team_a_season_games.iloc[-n_last_games:,:]
    team_b_previous_n_games = team_b_season_games.iloc[-n_last_games:,:]

    if len(team_a_previous_games.index) > 0:
        if team_a_previous_games.iloc[-1]['GAME_ID'] == g.iloc[[0],:].iloc[0]['GAME_ID']:
            break

    if not (int(season_id) >= first_season_id and len(team_a_previous_n_games.index) >= 5 and len(team_b_previous_n_games.index) >= 5 and team_a_odds != None and team_b_odds != None):
        print("Not enough games.")
        hf.update_elo(winner, elo_a, elo_b, elo_dic, team_a_id, team_b_id, team_a_pts, team_b_pts)
        continue

    # Getting player information
    teams_per[team_a_id] = hf.get_team_per_mean(team_a_id, game_id, game_date, season_id, season_games_plyrs)
    teams_per[team_b_id] = hf.get_team_per_mean(team_b_id, game_id, game_date, season_id, season_games_plyrs)

    # Season Win Percentage
    team_a_season_pct = hf.get_wl_pct(team_a_season_games)[0]
    team_b_season_pct = hf.get_wl_pct(team_b_season_games)[0]

    # Poins Conceded
    team_a_previous_games_pts_conceded = hf.team_points_conceded(team_a_previous_n_games, season_games)
    team_b_previous_games_pts_conceded = hf.team_points_conceded(team_b_previous_n_games, season_games)

    stats_team_a = hf.get_team_stats (team_a_previous_n_games, team_a_previous_games_pts_conceded, team_a_season_pct, elo_a, teams_per[team_a_id], team_a_odds)
    stats_team_b = hf.get_team_stats (team_b_previous_n_games, team_b_previous_games_pts_conceded, team_b_season_pct, elo_b, teams_per[team_b_id], team_b_odds)

    if '@' in g.iloc[[0],:].iloc[0]['MATCHUP']:
        matches_organized.append([season_id, game_date, team_b_abbv, team_a_abbv] + stats_team_b + stats_team_a + [1 if winner == 'B' else 0])
    else:
        matches_organized.append([season_id, game_date, team_a_abbv, team_b_abbv] + stats_team_a + stats_team_b + [1 if winner == 'A' else 0])

    matches_organized_lstm.append([team_a_abbv, team_a_id, game_date, team_a_pts, team_b_pts, g.iloc[[0],:].iloc[0]['FG_PCT'], g.iloc[[0],:].iloc[0]['FG3_PCT'], 
                    g.iloc[[0],:].iloc[0]['FT_PCT'], g.iloc[[0],:].iloc[0]['REB'], g.iloc[[0],:].iloc[0]['TOV'],
                    team_a_season_pct, elo_a, elo_b,
                     teams_per[team_a_id], team_a_odds, team_b_odds, 1 if winner == 'A' else 0])

    matches_organized_lstm.append([team_b_abbv, team_b_id, game_date, team_b_pts, team_a_pts, g.iloc[1:2,:].iloc[0]['FG_PCT'], g.iloc[1:2,:].iloc[0]['FG3_PCT'], 
                    g.iloc[1:2,:].iloc[0]['FT_PCT'], g.iloc[1:2,:].iloc[0]['REB'], g.iloc[1:2,:].iloc[0]['TOV'],
                    team_b_season_pct, elo_b, elo_a,
                     teams_per[team_b_id], team_b_odds, team_a_odds, 1 if winner == 'B' else 0])


    hf.update_elo(winner, elo_a, elo_b, elo_dic, team_a_id, team_b_id, team_a_pts, team_b_pts)

Creating CSV file of all games...
0/7379
Not enough games.
1/7379
Not enough games.
2/7379
Not enough games.
3/7379
Not enough games.
4/7379
Not enough games.
5/7379
Not enough games.
6/7379
Not enough games.
7/7379
Not enough games.
8/7379
Not enough games.
9/7379
Not enough games.
10/7379
Not enough games.
11/7379
Not enough games.
12/7379
Not enough games.
13/7379
Not enough games.
14/7379
Not enough games.
15/7379
Not enough games.
16/7379
Not enough games.
17/7379
Not enough games.
18/7379
Not enough games.
19/7379
Not enough games.
20/7379
Not enough games.
21/7379
Not enough games.
22/7379
Not enough games.
23/7379
Not enough games.
24/7379
Not enough games.
25/7379
Not enough games.
26/7379
Not enough games.
27/7379
Not enough games.
28/7379
Not enough games.
29/7379
Not enough games.
30/7379
Not enough games.
31/7379
Not enough games.
32/7379
Not enough games.
33/7379
Not enough games.
34/7379
Not enough games.
35/7379
Not enough games.
36/7379
Not enough games.
37/7379
Not en

759/7379
760/7379
761/7379
762/7379
763/7379
764/7379
765/7379
766/7379
767/7379
768/7379
769/7379
770/7379
771/7379
772/7379
773/7379
774/7379
775/7379
776/7379
777/7379
778/7379
779/7379
780/7379
781/7379
782/7379
783/7379
784/7379
785/7379
786/7379
787/7379
788/7379
789/7379
790/7379
791/7379
792/7379
793/7379
794/7379
795/7379
796/7379
797/7379
798/7379
799/7379
800/7379
801/7379
802/7379
803/7379
804/7379
805/7379
806/7379
807/7379
808/7379
809/7379
810/7379
811/7379
812/7379
813/7379
814/7379
815/7379
816/7379
817/7379
818/7379
819/7379
820/7379
821/7379
822/7379
823/7379
824/7379
825/7379
826/7379
827/7379
828/7379
829/7379
830/7379
831/7379
832/7379
833/7379
834/7379
835/7379
836/7379
837/7379
838/7379
839/7379
840/7379
841/7379
842/7379
843/7379
844/7379
845/7379
846/7379
847/7379
848/7379
849/7379
850/7379
851/7379
852/7379
853/7379
854/7379
855/7379
856/7379
857/7379
858/7379
859/7379
860/7379
861/7379
862/7379
863/7379
864/7379
865/7379
866/7379
867/7379
868/7379
869/7379
8

1466/7379
1467/7379
1468/7379
1469/7379
1470/7379
1471/7379
1472/7379
1473/7379
1474/7379
1475/7379
1476/7379
1477/7379
1478/7379
1479/7379
1480/7379
1481/7379
1482/7379
1483/7379
1484/7379
1485/7379
1486/7379
1487/7379
1488/7379
1489/7379
1490/7379
1491/7379
1492/7379
1493/7379
1494/7379
1495/7379
1496/7379
1497/7379
1498/7379
1499/7379
1500/7379
1501/7379
1502/7379
1503/7379
1504/7379
1505/7379
1506/7379
1507/7379
1508/7379
1509/7379
1510/7379
1511/7379
1512/7379
1513/7379
1514/7379
1515/7379
1516/7379
1517/7379
1518/7379
1519/7379
1520/7379
1521/7379
1522/7379
1523/7379
1524/7379
1525/7379
1526/7379
1527/7379
1528/7379
1529/7379
1530/7379
1531/7379
1532/7379
1533/7379
1534/7379
1535/7379
1536/7379
1537/7379
1538/7379
1539/7379
1540/7379
1541/7379
1542/7379
1543/7379
1544/7379
1545/7379
1546/7379
1547/7379
1548/7379
1549/7379
1550/7379
1551/7379
1552/7379
1553/7379
1554/7379
1555/7379
1556/7379
1557/7379
1558/7379
1559/7379
1560/7379
1561/7379
1562/7379
1563/7379
1564/7379
1565/7379


2287/7379
2288/7379
2289/7379
2290/7379
2291/7379
2292/7379
2293/7379
2294/7379
2295/7379
2296/7379
2297/7379
2298/7379
2299/7379
2300/7379
2301/7379
2302/7379
2303/7379
2304/7379
2305/7379
2306/7379
2307/7379
2308/7379
2309/7379
2310/7379
2311/7379
2312/7379
2313/7379
2314/7379
2315/7379
2316/7379
2317/7379
2318/7379
2319/7379
2320/7379
2321/7379
2322/7379
2323/7379
2324/7379
2325/7379
2326/7379
2327/7379
2328/7379
2329/7379
2330/7379
2331/7379
2332/7379
2333/7379
2334/7379
2335/7379
2336/7379
2337/7379
2338/7379
2339/7379
2340/7379
2341/7379
2342/7379
2343/7379
2344/7379
2345/7379
2346/7379
2347/7379
2348/7379
2349/7379
2350/7379
2351/7379
2352/7379
2353/7379
2354/7379
2355/7379
2356/7379
2357/7379
2358/7379
2359/7379
2360/7379
2361/7379
2362/7379
2363/7379
2364/7379
2365/7379
2366/7379
2367/7379
2368/7379
2369/7379
2370/7379
2371/7379
2372/7379
2373/7379
2374/7379
2375/7379
2376/7379
2377/7379
2378/7379
2379/7379
2380/7379
2381/7379
2382/7379
2383/7379
2384/7379
2385/7379
2386/7379


2965/7379
2966/7379
2967/7379
2968/7379
2969/7379
2970/7379
2971/7379
2972/7379
2973/7379
2974/7379
2975/7379
2976/7379
2977/7379
2978/7379
2979/7379
2980/7379
2981/7379
2982/7379
2983/7379
2984/7379
2985/7379
2986/7379
2987/7379
2988/7379
2989/7379
2990/7379
2991/7379
2992/7379
2993/7379
2994/7379
2995/7379
2996/7379
2997/7379
2998/7379
2999/7379
3000/7379
3001/7379
3002/7379
3003/7379
3004/7379
3005/7379
3006/7379
3007/7379
3008/7379
3009/7379
3010/7379
3011/7379
3012/7379
3013/7379
3014/7379
3015/7379
3016/7379
3017/7379
3018/7379
3019/7379
3020/7379
3021/7379
3022/7379
3023/7379
3024/7379
3025/7379
3026/7379
3027/7379
3028/7379
3029/7379
3030/7379
3031/7379
3032/7379
3033/7379
3034/7379
3035/7379
3036/7379
3037/7379
3038/7379
3039/7379
3040/7379
3041/7379
3042/7379
3043/7379
3044/7379
3045/7379
3046/7379
3047/7379
3048/7379
3049/7379
3050/7379
3051/7379
3052/7379
3053/7379
3054/7379
3055/7379
3056/7379
3057/7379
3058/7379
3059/7379
3060/7379
3061/7379
3062/7379
3063/7379
3064/7379


Not enough games.
3726/7379
Not enough games.
3727/7379
Not enough games.
3728/7379
Not enough games.
3729/7379
Not enough games.
3730/7379
Not enough games.
3731/7379
Not enough games.
3732/7379
Not enough games.
3733/7379
Not enough games.
3734/7379
Not enough games.
3735/7379
Not enough games.
3736/7379
Not enough games.
3737/7379
Not enough games.
3738/7379
Not enough games.
3739/7379
Not enough games.
3740/7379
Not enough games.
3741/7379
Not enough games.
3742/7379
Not enough games.
3743/7379
Not enough games.
3744/7379
Not enough games.
3745/7379
Not enough games.
3746/7379
Not enough games.
3747/7379
Not enough games.
3748/7379
Not enough games.
3749/7379
Not enough games.
3750/7379
Not enough games.
3751/7379
Not enough games.
3752/7379
Not enough games.
3753/7379
Not enough games.
3754/7379
Not enough games.
3755/7379
Not enough games.
3756/7379
Not enough games.
3757/7379
Not enough games.
3758/7379
Not enough games.
3759/7379
3760/7379
3761/7379
Not enough games.
3762/7379


4469/7379
4470/7379
4471/7379
4472/7379
4473/7379
4474/7379
4475/7379
4476/7379
4477/7379
4478/7379
4479/7379
4480/7379
4481/7379
4482/7379
4483/7379
4484/7379
4485/7379
4486/7379
4487/7379
4488/7379
4489/7379
4490/7379
4491/7379
4492/7379
4493/7379
4494/7379
4495/7379
4496/7379
4497/7379
4498/7379
4499/7379
4500/7379
4501/7379
4502/7379
4503/7379
4504/7379
4505/7379
4506/7379
4507/7379
4508/7379
4509/7379
4510/7379
4511/7379
4512/7379
4513/7379
4514/7379
4515/7379
4516/7379
4517/7379
4518/7379
4519/7379
4520/7379
4521/7379
4522/7379
4523/7379
4524/7379
4525/7379
4526/7379
4527/7379
4528/7379
4529/7379
4530/7379
4531/7379
4532/7379
4533/7379
4534/7379
4535/7379
4536/7379
4537/7379
4538/7379
4539/7379
4540/7379
4541/7379
4542/7379
4543/7379
4544/7379
4545/7379
4546/7379
4547/7379
4548/7379
4549/7379
4550/7379
4551/7379
4552/7379
4553/7379
4554/7379
4555/7379
4556/7379
4557/7379
4558/7379
4559/7379
4560/7379
4561/7379
4562/7379
4563/7379
4564/7379
4565/7379
4566/7379
4567/7379
4568/7379


5147/7379
5148/7379
5149/7379
5150/7379
5151/7379
5152/7379
5153/7379
5154/7379
5155/7379
5156/7379
5157/7379
5158/7379
5159/7379
5160/7379
5161/7379
5162/7379
5163/7379
5164/7379
5165/7379
5166/7379
5167/7379
5168/7379
5169/7379
5170/7379
5171/7379
5172/7379
5173/7379
5174/7379
5175/7379
5176/7379
5177/7379
5178/7379
5179/7379
5180/7379
5181/7379
5182/7379
5183/7379
5184/7379
5185/7379
5186/7379
5187/7379
5188/7379
5189/7379
5190/7379
5191/7379
5192/7379
5193/7379
5194/7379
5195/7379
5196/7379
5197/7379
5198/7379
5199/7379
5200/7379
5201/7379
5202/7379
5203/7379
5204/7379
5205/7379
5206/7379
5207/7379
5208/7379
5209/7379
5210/7379
5211/7379
5212/7379
5213/7379
5214/7379
5215/7379
5216/7379
5217/7379
5218/7379
5219/7379
5220/7379
5221/7379
5222/7379
5223/7379
5224/7379
5225/7379
5226/7379
5227/7379
5228/7379
5229/7379
5230/7379
5231/7379
5232/7379
5233/7379
5234/7379
5235/7379
5236/7379
5237/7379
5238/7379
5239/7379
5240/7379
5241/7379
5242/7379
5243/7379
5244/7379
5245/7379
5246/7379


5967/7379
5968/7379
5969/7379
5970/7379
5971/7379
5972/7379
5973/7379
5974/7379
5975/7379
5976/7379
5977/7379
5978/7379
5979/7379
5980/7379
5981/7379
5982/7379
5983/7379
5984/7379
5985/7379
5986/7379
5987/7379
5988/7379
5989/7379
5990/7379
5991/7379
5992/7379
5993/7379
5994/7379
5995/7379
5996/7379
5997/7379
5998/7379
5999/7379
6000/7379
6001/7379
6002/7379
6003/7379
6004/7379
6005/7379
6006/7379
6007/7379
6008/7379
6009/7379
6010/7379
6011/7379
6012/7379
6013/7379
6014/7379
6015/7379
6016/7379
6017/7379
6018/7379
6019/7379
6020/7379
6021/7379
6022/7379
6023/7379
6024/7379
6025/7379
6026/7379
6027/7379
6028/7379
6029/7379
6030/7379
6031/7379
6032/7379
6033/7379
6034/7379
6035/7379
6036/7379
6037/7379
6038/7379
6039/7379
6040/7379
6041/7379
6042/7379
6043/7379
6044/7379
6045/7379
6046/7379
6047/7379
6048/7379
6049/7379
6050/7379
6051/7379
6052/7379
6053/7379
6054/7379
6055/7379
6056/7379
6057/7379
6058/7379
6059/7379
6060/7379
6061/7379
6062/7379
6063/7379
6064/7379
6065/7379
6066/7379


6644/7379
6645/7379
6646/7379
6647/7379
6648/7379
6649/7379
6650/7379
6651/7379
6652/7379
6653/7379
6654/7379
6655/7379
6656/7379
6657/7379
6658/7379
6659/7379
6660/7379
6661/7379
6662/7379
6663/7379
6664/7379
6665/7379
6666/7379
6667/7379
6668/7379
6669/7379
6670/7379
6671/7379
6672/7379
6673/7379
6674/7379
6675/7379
6676/7379
6677/7379
6678/7379
6679/7379
6680/7379
6681/7379
6682/7379
6683/7379
6684/7379
6685/7379
6686/7379
6687/7379
6688/7379
6689/7379
6690/7379
6691/7379
6692/7379
6693/7379
6694/7379
6695/7379
6696/7379
6697/7379
6698/7379
6699/7379
6700/7379
6701/7379
6702/7379
6703/7379
6704/7379
6705/7379
6706/7379
6707/7379
6708/7379
6709/7379
6710/7379
6711/7379
6712/7379
6713/7379
6714/7379
6715/7379
6716/7379
6717/7379
6718/7379
6719/7379
6720/7379
6721/7379
6722/7379
6723/7379
6724/7379
6725/7379
6726/7379
6727/7379
6728/7379
6729/7379
6730/7379
6731/7379
6732/7379
6733/7379
6734/7379
6735/7379
6736/7379
6737/7379
6738/7379
6739/7379
6740/7379
6741/7379
6742/7379
6743/7379


In [5]:
final_df = pd.DataFrame(matches_organized, columns=['SEASON_ID', 'GAME_DATE', 'TEAM_A', 'TEAM_B',
                                                    'PTS_A', 'PTS_CON_A', 'FG_PCT_A', 'FG3_PCT_A', 'FT_PCT_A', 'REB_A', 'TOV_A', 'SEASON_A_PCT', 'ELO_A', 'PER_A', 'ODDS_A',
                                                    'PTS_B', 'PTS_CON_B', 'FG_PCT_B', 'FG3_PCT_B', 'FT_PCT_B', 'REB_B', 'TOV_B', 'SEASON_B_PCT', 'ELO_B', 'PER_B', 'ODDS_B',
                                                    'WINNER'])
final_df_lstm = pd.DataFrame(matches_organized_lstm, columns=['TEAM_ABBV', 'TEAM_ID', 'DATE',
                                                    'PTS_A', 'PTS_CON_A', 'FG_PCT_A', 'FG3_PCT_A', 'FT_PCT_A', 'REB_A', 'TOV_A', 
                                                    'SEASON_A_PCT', 'ELO_A', 'ELO_OPP', 'PER_A', 'ODDS_A', 'ODDS_OPP',
                                                    'WINNER'])
final_df.to_csv('../data/{}-{}.csv'.format(first_season, last_season-1))
final_df_lstm.to_csv('../data/LSTM/{}-{}.csv'.format(first_season, last_season-1))