In [1]:
import pandas as pd
import re
import os

In [2]:
df_seeds = pd.read_csv('../data/TourneySeeds.csv')

Defining all the shared columns (split by winners and losers)

In [3]:
winning_columns = ['Wteam', 'Wscore', 'Lscore', 'Wloc', 'Lteam',
                   'Numot', 'Wfgm', 'Wfga',
                   'Wfgm3', 'Wfga3', 'Wftm',
                   'Wfta', 'Wor', 'Wdr', 'Wast',
                   'Wto', 'Wstl', 'Wblk', 'Wpf',
                   'Season', 'Daynum']

losing_columns = ['Lteam', 'Lscore', 'Wscore', 'Wloc', 'Wteam',
                  'Numot', 'Lfgm', 'Lfga',
                  'Lfgm3', 'Lfga3', 'Lftm',
                  'Lfta', 'Lor', 'Ldr', 'Last',
                  'Lto', 'Lstl', 'Lblk', 'Lpf',
                  'Season', 'Daynum']

Defining the conversion from the winner/loser specific to a general name

In [4]:
winning_col_convert = {'Wteam': 'team', 'Wscore': 'score', 'Wloc': 'loc', 'Lteam': 'oppteam',
                       'Numot': 'Numot', 'Wfgm': 'fgm', 'Wfga': 'fga', 'Lscore': 'oppscore',
                       'Wfgm3': 'fgm3', 'Wfga3': 'fga3', 'Wftm': 'ftm',
                       'Wfta': 'fta', 'Wor': 'or', 'Wdr': 'dr', 'Wast': 'ast',
                       'Wto': 'to', 'Wstl': 'stl', 'Wblk': 'blk', 'Wpf': 'pf'}

losing_col_convert =  {'Lteam': 'team', 'Lscore': 'score', 'Wloc': 'loc', 'Wteam': 'oppteam',
                       'Numot': 'Numot', 'Lfgm': 'fgm', 'Lfga': 'fga', 'Wscore': 'oppscore',
                       'Lfgm3': 'fgm3', 'Lfga3': 'fga3', 'Lftm': 'ftm',
                       'Lfta': 'fta', 'Lor': 'or', 'Ldr': 'dr', 'Last': 'ast',
                       'Lto': 'to', 'Lstl': 'stl', 'Lblk': 'blk', 'Lpf': 'pf'}

### Read in season data

In [5]:
df_season = pd.read_csv('../data/RegularSeasonDetailedResults.csv')

df_season_winners = df_season[winning_columns].copy()
df_season_winners['game_won'] = 1
df_season_winners = df_season_winners.rename(columns=winning_col_convert)

df_season_losers = df_season[losing_columns].copy()
df_season_losers['game_won'] = 0
df_season_losers = df_season_losers.rename(columns=losing_col_convert)

Location is winner specific, so need to swap "home" and "away"

In [6]:
loser_away_games, loser_home_games = df_season_losers['loc'] == 'H', df_season_losers['loc'] == 'A'
df_season_losers.loc[loser_away_games, 'loc'] = 'A'
df_season_losers.loc[loser_home_games, 'loc'] = 'H'

Combine the winners and loser data, and define the season data variable

In [7]:
df_season_comb = df_season_winners.append(df_season_losers)
df_season_comb['season_data'] = 1

### Read in tournament data

In [8]:
df_tourney = pd.read_csv('../data/TourneyDetailedResults.csv')

df_tourney_winners = df_tourney[winning_columns].copy()
df_tourney_winners['game_won'] = 1
df_tourney_winners = df_tourney_winners.rename(columns=winning_col_convert)

df_tourney_losers = df_tourney[losing_columns].copy()
df_tourney_losers['game_won'] = 0
df_tourney_losers = df_tourney_losers.rename(columns=losing_col_convert)

In [9]:
df_tourney_comb = df_tourney_winners.append(df_tourney_losers)
df_tourney_comb['season_data'] = 0

### Comining tournament and season data

In [10]:
df_comb = df_season_comb.append(df_tourney_comb)
df_comb = df_comb.set_index('team', drop=True)
df_comb = df_comb.set_index('Season', drop=True, append=True)

In [11]:
df_comb.to_csv('../data/combined_dataset.csv')