In [1]:
import pandas as pd
import numpy as np
import sys
import os

In [2]:
sys.path.append('{}/mmml'.format(os.path.dirname(os.getcwd())))
from mmml.config import data_folder
from mmml.game_results import *

In [3]:
base_path = os.path.dirname(os.getcwd())

### `fnDataPrep` Work

<b> X FEATURES </b>
- Aggregated Regular Season Stats
- ELO Rankings
- Massey Rankings

In [4]:
## Get Regular Season Results
reg_results_df = pd.read_csv('{}/Data/Raw/{}/MDataFiles_Stage1/MRegularSeasonDetailedResults.csv'.format(base_path, data_folder))
season_results = gameResults(reg_results_df)

In [5]:
# Regular Season Stats
season_stats = season_results.getSeasonStats()

In [6]:
# Elo
elo = season_results.getElo()



In [7]:
## Massey Rankings
massey = pd.read_csv('{}/Data/Raw/{}/MDataFiles_Stage1/MMasseyOrdinals.csv'.format(base_path, data_folder))
massey_final = massey.query('RankingDayNum == 133').copy()
massey_final = massey_final.set_index(['TeamID','Season']).query('SystemName in ["POM", "SAG", "MOR"]') # "LMC", "EBP"
massey_final = massey_final.drop('RankingDayNum', axis=1)
massey_final = massey_final.pivot(columns='SystemName')['OrdinalRank']

In [8]:
# Merge Independent Features
x_features = season_stats.merge(elo, left_index=True, right_index=True)\
                    .merge(massey_final, left_index=True, right_index=True)

In [9]:
x_features.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Ast,Blk,DR,FGA,FGA3,FGM,FGM3,FTA,FTM,NLoc,...,wins,possessions,o_eff,d_eff,net_eff,elo,last_elo,MOR,POM,SAG
TeamID,Season,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1171,2012,278,70,667,1457,447,571,148,526,373,2,...,4,1839.825,90.389032,100.661748,-10.272716,"[1500.0, 1491.8493246116866, 1480.0, 1488.4805...",1351.987274,286.0,294.0,300.0
1402,2006,287,93,515,1311,406,542,125,422,289,0,...,2,1630.65,91.865207,110.385429,-18.520222,"[1500.0, 1491.8493246116866, 1483.36879746356,...",1351.248141,316.0,324.0,327.0
1328,2013,384,87,770,1789,472,780,154,646,491,5,...,20,2090.2375,105.490405,98.170662,7.319742,"[1500.0, 1508.1506753883134, 1520.102609012492...",1560.789937,49.0,51.0,43.0


<b> BASE OF ACCTS </b>
- Info on Tournament Matchups: Round, Seed, Game Slot, TeamID, etc.

In [10]:
t_results_df = pd.read_csv('{}/Data/Raw/{}/MDataFiles_Stage1/MNCAATourneyDetailedResults.csv'.format(base_path, data_folder))
t_results = gameResults(t_results_df)
base = t_results.getBase()

In [11]:
## Add Info on GameRound
round_lookup = {134: 0, 135: 0, 136: 1, 137: 1, 138: 2, 139: 2, 143: 3,
            144: 3, 145: 4, 146: 4, 152: 5, 154: 6}

base['GameRound'] = base['DayNum'].apply(lambda x: round_lookup[x])

In [12]:
## Team Seed Info
seeds = pd.read_csv('{}/Data/Raw/{}/MDataFiles_Stage1/MNCAATourneySeeds.csv'.format(base_path, data_folder))
t_seeds = seeds[['Season', 'TeamID', 'Seed']]
t_seeds.set_index(['TeamID', 'Season'], inplace=True)

In [13]:
## Tournament Slots
slots_simple = pd.read_csv('{}/Data/Raw/{}/MDataFiles_Stage1/MNCAATourneySeedRoundSlots.csv'.format(base_path, data_folder))
slots_simple.drop('EarlyDayNum', axis=1, inplace=True)
slots_simple.drop('LateDayNum', axis=1, inplace=True)
slots_simple = slots_simple.set_index(['Seed', 'GameRound'])

In [14]:
## MERGE Matchups, Seeds, Slots
base = base.merge(t_seeds, left_on=['HTeamID', 'Season'], right_index=True, how='left')\
                .merge(t_seeds, left_on=['ATeamID', 'Season'], right_index=True, how='left', suffixes=['_H', '_A'])\
                .merge(slots_simple, left_on=['Seed_H', 'GameRound'], right_index=True)

In [15]:
base.head(3)

Unnamed: 0,HTeamID,ATeamID,Season,DayNum,HWin,HScore,AScore,GameRound,Seed_H,Seed_A,GameSlot
1,1112,1436,2003,136,1,80,51,1,Z01,Z16,R1Z1
86,1246,1197,2004,137,1,96,76,1,Z01,Z16a,R1Z1
212,1163,1107,2006,137,1,72,59,1,Z01,Z16,R1Z1
