In [103]:
import numpy as np
import pandas as pd
import json, csv
import os

from nba_api.live.nba.endpoints import scoreboard

In [102]:
DATASET_DIR = "data/"

stat_cols = ["ORtg", 'DRtg', 'NRtg', 'TS%', 'oeFG%', 'oTOV%', 'oORB%']
stat_cols = [10, 11, 12, 16, 17, 18, 19, 20]
stat_cols = [10, 11, 12, 16, 17, 18, 19, 20, 21, 22, 23, 24]
# stat_cols = range(2, 25)
# print(stat_cols)

In [38]:
def gen_dict_extract(key, var):
    if hasattr(var,'iteritems'): # hasattr(var,'items') for python 3
        for k, v in var.iteritems(): # var.items() for python 3
            if k == key:
                yield v
            if isinstance(v, dict):
                for result in gen_dict_extract(key, v):
                    yield result
            elif isinstance(v, list):
                for d in v:
                    for result in gen_dict_extract(key, d):
                        yield result

## Import Data

In [88]:
with open(DATASET_DIR + "player_dictionary.json") as file:
    player_dict = json.load( file )

with open(DATASET_DIR + "team_to_id.json") as file:
    team_to_id = json.load( file )

with open(DATASET_DIR + "games_dict.json") as file:
    games_dict = json.load( file )

team_stats = pd.read_csv(DATASET_DIR + "team_adv_stats.csv", delimiter="\t")

In [90]:
team_stats

Unnamed: 0,Rk,Team,Age,W,L,PW,PL,MOV,SOS,SRS,...,oTOV%,oORB%,oFT/FGA,deFG%,dTOV%,dDRB%,dFT/FGA,Attend.,Attend./G,Season
0,1.0,1.610613e+09,25.8,61.0,21.0,61.0,21.0,7.74,-0.87,6.88,...,14.0,31.6,0.285,0.467,14.1,68.8,0.245,676101,0,98.0
1,2.0,1.610613e+09,31.1,61.0,21.0,61.0,21.0,7.17,-0.84,6.33,...,13.4,28.7,0.236,0.478,15.9,66.1,0.223,699952,0,98.0
2,3.0,1.610613e+09,31.7,62.0,20.0,61.0,21.0,7.11,0.13,7.24,...,13.3,35.7,0.219,0.457,14.7,70.6,0.220,983444,0,98.0
3,4.0,1.610613e+09,29.7,62.0,20.0,59.0,23.0,6.54,-0.81,5.73,...,14.8,32.3,0.334,0.471,13.5,71.0,0.271,815889,0,98.0
4,5.0,1.610613e+09,29.7,58.0,24.0,59.0,23.0,6.09,0.16,6.25,...,14.0,27.9,0.262,0.453,14.4,67.8,0.236,645302,0,98.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
770,28.0,1.610613e+09,22.1,22.0,60.0,23.0,59.0,-7.85,0.24,-7.62,...,14.0,30.2,0.215,0.564,11.8,75.8,0.218,668865,16314,23.0
771,29.0,1.610613e+09,24.1,17.0,65.0,22.0,60.0,-8.22,0.49,-7.73,...,13.3,24.9,0.227,0.557,11.9,74.0,0.231,759715,18596,23.0
772,30.0,1.610613e+09,23.9,22.0,60.0,19.0,63.0,-10.04,0.22,-9.82,...,13.0,25.6,0.170,0.576,12.0,74.9,0.201,694434,15508,23.0
773,,,,,,,,,,,...,,,,,,,,,,


In [104]:
temp = team_stats.query(f"Season==98 and Team==1610612737")
temp = temp.iloc[:, stat_cols].values.tolist()
print(temp[0])

[108.2, 104.3, 3.9, 0.533, 0.481, 14.1, 33.9, 0.275, 0.468, 13.0, 69.1, 0.194]


## Create Dataset

#### Get array of arrays of player ids in games

In [105]:
# Get all games
games_list = list(games_dict.keys())

In [106]:
games_dataset = []

# Iterate through all games
for game in games_list:
    # Combined array
    combined     = []

    # Get team ids
    home_team = str(games_dict[game]["team_home"])
    away_team = str(games_dict[game]["team_away"])

    # Get players, result, and season
    home_players = games_dict[game][home_team]
    away_players = games_dict[game][away_team]
    game_result  = games_dict[game]["win_home"]
    season       = games_dict[game]["season"]

    # Info list to get player info (removed later)
    info_list    = [season - 2000 if season >= 2000 else season - 1900,
                    home_team,
                    away_team]

    # Padding
    home_players += [0] * (24 - len(home_players))
    away_players += [0] * (24 - len(away_players))

    # Combine player lists, result, and info list
    combined.append(home_players)
    combined.append(away_players)
    combined.append(game_result)
    combined.append(info_list)

    games_dataset.append(combined)

In [107]:
print(games_dataset[0])
print(games_dataset[1])
print(games_dataset[5])

[[1630530, 1630230, 202685, 1630529, 203468, 1626195, 1630700, 1630631, 1629637, 1628984, 202066, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1629641, 1631110, 1627751, 1630170, 1630200, 1628380, 203926, 1626196, 1631103, 1628966, 1629646, 1626169, 1629676, 203476, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 1, [23, '1610612740', '1610612759']]
[[1628374, 1629020, 1631117, 203903, 201144, 1629012, 200752, 1627736, 1629638, 1630195, 1630534, 1631323, 1628962, 1629659, 1630695, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1630557, 1628398, 1629655, 203078, 1628420, 201959, 1629060, 1630692, 203115, 1629653, 1631098, 1630264, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 1, [23, '1610612762', '1610612764']]
[[1627759, 1628369, 201143, 1628401, 1630202, 1630573, 1629684, 1627763, 1629057, 1628436, 201933, 1628382, 203943, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1627741, 1630174, 1626167, 1629614, 1630169, 1631097, 1630537, 1630188, 1629052, 204456, 1629048, 1630543, 201949, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 0, [23, '1610612738', '1610

#### Convert array of arrays of player ids to player stats

In [108]:
stats_dataset = []
for game in games_dataset:
    season = str(game[3][0])

    three_stats = []

    # Home team
    home_stats = []
    home_team = str(game[3][1])
    for player in game[0]:

        if str(player) not in player_dict[season][home_team]:
            player_stats = [0] * 75
            home_stats.append(player_stats)
        else:
            player_stats = player_dict[season][home_team][str(player)]
            home_stats.append(player_stats)

    # Away team
    away_stats = []
    away_team = str(game[3][2])
    for player in game[1]:

        if str(player) not in player_dict[season][away_team]:
            player_stats = [0] * 75
            away_stats.append(player_stats)
        else:
            player_stats = player_dict[season][away_team][str(player)]
            away_stats.append(player_stats)


    # Sort Arrays based on playtime
    home_stats.sort(key = lambda x: x[3], reverse=True)
    away_stats.sort(key = lambda x: x[3], reverse=True)

    # Reduce to 9 players per team
    home_stats = home_stats[0 : 9]
    home_stats = sum(home_stats, [])

    away_stats = away_stats[0 : 9]
    away_stats = sum(away_stats, [])
    
    # Create linear dataset
    combined = []

    # Add team stats
    combined.extend(home_stats)
    combined.extend(away_stats)

    ## Add 3 primary stats of each team
    temp = team_stats.query(f"Season=={season} and Team=={home_team}")
    temp = temp.iloc[:, stat_cols].values.tolist()[0]
    combined.extend(temp)

    temp = team_stats.query(f"Season=={season} and Team=={away_team}")
    temp = temp.iloc[:, stat_cols].values.tolist()[0]
    combined.extend(temp)

    # Add Result
    combined.append(game[2])

    stats_dataset.append(combined)

In [None]:
# Replaced
stats_dataset = []
for game in games_dataset:
    # Home team
    home_stats = []
    for player in game[0]:
        season    = str(game[3][0])
        home_team = str(game[3][1])

        if str(player) not in player_dict[season][home_team]:
            player_stats = [0] * 75
            home_stats.extend(player_stats)
            continue

        player_stats = player_dict[season][home_team][str(player)]

        home_stats.extend(player_stats)


    # Away team
    away_stats = []
    for player in game[1]:
        season    = str(game[3][0])
        away_team = str(game[3][2])

        if str(player) not in player_dict[season][away_team]:
            player_stats = [0] * 75
            away_stats.extend(player_stats)
            continue
            
        player_stats = player_dict[season][away_team][str(player)]

        away_stats.extend(player_stats)
    
    combined = []
    combined.extend(home_stats)
    combined.extend(away_stats)
    combined.append(game[2])

    stats_dataset.append(combined)

In [109]:
print(len(stats_dataset[0]))
print(stats_dataset[0])

1375
[31, 64, 64, 35.2, 7.9, 18.1, 0.435, 2.8, 7.4, 0.383, 5.0, 10.7, 0.47, 0.513, 2.5, 3.2, 0.783, 0.8, 3.6, 4.4, 6.0, 0.9, 0.5, 2.6, 2.0, 21.1, 22, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 22, 68, 54, 30.0, 4.6, 9.7, 0.475, 2.4, 6.0, 0.398, 2.2, 3.7, 0.599, 0.597, 1.9, 2.0, 0.906, 0.8, 2.9, 3.7, 1.4, 1.0, 0.5, 0.8, 2.0, 13.4, 22, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 24, 55, 55, 29.1, 3.5, 7.7, 0.458, 0.8, 2.6, 0.299, 2.7, 5.1, 0.539, 0.508, 1.9, 2.5, 0.75, 1.4, 2.6, 4.0, 2.3, 1.7, 0.6, 1.3, 3.3, 9.7, 22, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0

## Write dataset to CSV file

In [110]:
with open(DATASET_DIR + "nba_dataset.csv", "w") as file:
    writer = csv.writer(file)
    writer.writerows(stats_dataset)

## TO DO: Create PyTorch Dataset

In [12]:
class NBAGameDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(os.getcwd(), self.img_dir, self.img_labels.iloc[idx, 0])
        image = read_image(img_path)
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

# Get Current game

## Creating dataset from new game

In [121]:
games = scoreboard.ScoreBoard()
# dictionary
games = [ game for game in games.get_dict()["scoreboard"]["games"] if game["gameStatus"] == 3 ]

In [120]:
[print(game["gameCode"]) for game in games]

[]

In [5]:
home_team = str( games[1]["homeTeam"]["teamId"] )
away_team = str( games[1]["awayTeam"]["teamId"] )
season    = "23"

In [238]:
home_team = "1610612743"
away_team = "1610612748"
season    = "23"

In [239]:
home_team

'1610612743'

In [240]:
home_players = list( player_dict[season][home_team].keys() )
# Padding
home_players += ['0'] * (24 - len(home_players))

In [241]:
away_players = list( player_dict[season][away_team].keys() )
# Padding
away_players += ['0'] * (24 - len(away_players))

In [242]:
len(home_players)

24

In [243]:
away_players

['1628964',
 '1627736',
 '201976',
 '1628425',
 '1628972',
 '1628418',
 '1631108',
 '203076',
 '1629117',
 '1629060',
 '2544',
 '1627745',
 '1629134',
 '1630590',
 '1630559',
 '1628432',
 '1626156',
 '1630346',
 '203471',
 '1631306',
 '1629308',
 '1629020',
 '1629022',
 '201566']

In [244]:
test_dataset = []

three_stats = []

for i in range(0,7):
    if (i == 2) or (i == 4) or (i == 5) or (i == 6):
        temp = home_team
        home_team = away_team
        away_team = temp

        temp = home_players
        home_players = away_players
        away_players = temp
        
    # Home team
    home_stats = []
    for player in home_players:
        if str(player) not in player_dict[season][home_team]:
            player_stats = [0] * 75
            home_stats.append(player_stats)
        else:
            player_stats = player_dict[season][home_team][str(player)]
            home_stats.append(player_stats)

    # Away team
    away_stats = []
    for player in away_players:
        if str(player) not in player_dict[season][away_team]:
            player_stats = [0] * 75
            away_stats.append(player_stats)
        else:
            player_stats = player_dict[season][away_team][str(player)]
            away_stats.append(player_stats)

    # Sort Arrays based on playtime
    home_stats.sort(key = lambda x: x[3], reverse=True)
    away_stats.sort(key = lambda x: x[3], reverse=True)

    # Reduce to 9 players per team
    home_stats = home_stats[0 : 9]
    home_stats = sum(home_stats, [])

    away_stats = away_stats[0 : 9]
    away_stats = sum(away_stats, [])

    # Create linear dataset
    combined = []

    # Add team stats
    combined.extend(home_stats)
    combined.extend(away_stats)

    ## Add 3 primary stats of each team
    temp = team_stats.query(f"Season=={season} and Team=={home_team}")
    temp = temp.iloc[:, stat_cols].values.tolist()[0]
    combined.extend(temp)

    temp = team_stats.query(f"Season=={season} and Team=={away_team}")
    temp = temp.iloc[:, stat_cols].values.tolist()[0]
    combined.extend(temp)

    # Add Result
    combined.append(game[2])
    
    test_dataset.append(combined)

In [245]:
test_dataset

[[27,
  64,
  64,
  33.9,
  9.5,
  15.0,
  0.634,
  0.8,
  2.1,
  0.394,
  8.6,
  12.8,
  0.674,
  0.662,
  4.9,
  6.0,
  0.818,
  2.4,
  9.5,
  11.9,
  9.9,
  1.2,
  0.7,
  3.5,
  2.6,
  24.7,
  22,
  1.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  1.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  25,
  58,
  58,
  33.0,
  7.3,
  16.3,
  0.45,
  2.6,
  6.6,
  0.394,
  4.7,
  9.7,
  0.488,
  0.53,
  2.9,
  3.5,
  0.827,
  0.8,
  3.2,
  3.9,
  6.1,
  1.0,
  0.2,
  2.2,
  1.5,
  20.1,
  22,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  1.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  1.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  

In [246]:
with open(DATASET_DIR + "test_game.csv", "w") as file:
    writer = csv.writer(file)
    writer.writerows(test_dataset)