In [1]:
import feather
import pandas as pd
import numpy as np
from utils import normalize_by_year, season_to_playoff_year, tune_classifier

In [2]:
series_data = feather.read_dataframe('series.data')
year_data = feather.read_dataframe('year.data')

In [3]:
series_data

Unnamed: 0,Winner,Winner Seed,Winner Wins,Loser,Loser Seed,Loser Wins,Margin,Series,Year
0,Milwaukee Bucks,1,4,Detroit Pistons,1,0,4,Eastern Conf First Round,2019
1,Toronto Raptors,2,4,Orlando Magic,2,1,3,Eastern Conf First Round,2019
2,Philadelphia ers,3,4,Brooklyn Nets,3,1,3,Eastern Conf First Round,2019
3,Boston Celtics,4,4,Indiana Pacers,4,0,4,Eastern Conf First Round,2019
4,Golden State Warriors,1,4,Los Angeles Clippers,1,2,2,Western Conf First Round,2019
5,Denver Nuggets,2,4,San Antonio Spurs,2,3,1,Western Conf First Round,2019
6,Portland Trail Blazers,3,4,Oklahoma City Thunder,3,1,3,Western Conf First Round,2019
7,Houston Rockets,4,4,Utah Jazz,4,1,3,Western Conf First Round,2019
8,Milwaukee Bucks,1,4,Boston Celtics,1,1,3,Eastern Conf Semifinals,2019
9,Toronto Raptors,2,4,Philadelphia ers,2,3,1,Eastern Conf Semifinals,2019


In [4]:
year_data['TEAM_FULLNAME'] = year_data['TEAM_CITY'] + ' ' + year_data['TEAM_NAME']
year_data['PLAYOFF_YEAR'] = year_data['YEAR'].map(season_to_playoff_year)
year_data

Unnamed: 0,GP,WINS,LOSSES,WIN_PCT,CONF_RANK,DIV_RANK,PO_WINS,PO_LOSSES,FGM,FGA,...,PTS_RANK,YEAR,CONF_COUNT,DIV_COUNT,TEAM_ID,TEAM_CITY,TEAM_NAME,NBA_FINALS_APPEARANCE,TEAM_FULLNAME,PLAYOFF_YEAR
0,0.000000,-1.399708,1.399708,-1.399922,0.000000,1.336306,-0.980581,-1.336306,-1.283881,0.000000,...,1.224745,1946-47,,6,1610612738,Boston,Celtics,,Boston Celtics,1947
1,0.000000,0.874818,-0.874818,0.873636,0.000000,-1.069045,1.372813,0.267261,1.155493,0.000000,...,-1.224745,1946-47,,6,1610612744,Philadelphia,Warriors,LEAGUE CHAMPION,Philadelphia Warriors,1947
2,0.000000,0.524891,-0.524891,0.526287,0.000000,-0.267261,-0.392232,1.069045,0.128388,0.000000,...,0.000000,1946-47,,6,1610612752,New York,Knicks,,New York Knicks,1947
0,0.000000,-1.401826,1.401826,-1.401645,0.000000,1.224745,-0.707107,-0.707107,-1.038815,0.000000,...,1.224745,1947-48,,4,1610612738,Boston,Celtics,,Boston Celtics,1948
1,0.000000,0.862662,-0.862662,0.863744,0.000000,-1.224745,1.414214,1.414214,-0.311645,0.000000,...,0.000000,1947-48,,4,1610612744,Philadelphia,Warriors,FINALS APPEARANCE,Philadelphia Warriors,1948
2,0.000000,0.539164,-0.539164,0.537901,0.000000,0.000000,-0.707107,-0.707107,1.350460,0.000000,...,-1.224745,1947-48,,4,1610612752,New York,Knicks,,New York Knicks,1948
0,0.000000,-0.861357,0.861357,-0.860703,0.000000,1.165998,-0.759468,-1.341641,-0.329452,0.000000,...,0.819232,1948-49,,6,1610612738,Boston,Celtics,,Boston Celtics,1949
1,0.000000,-0.524304,0.524304,-0.523172,0.000000,0.529999,-0.759468,0.447214,0.762942,0.000000,...,-0.409616,1948-49,,6,1610612744,Philadelphia,Warriors,,Philadelphia Warriors,1949
2,0.000000,1.273310,-1.273310,1.272490,0.000000,-0.741999,2.044722,0.447214,1.231111,0.000000,...,-1.146925,1948-49,,6,1610612747,Minneapolis,Lakers,LEAGUE CHAMPION,Minneapolis Lakers,1949
3,0.000000,-0.074901,0.074901,-0.077632,0.000000,-0.741999,0.292103,1.341641,-0.485509,0.000000,...,0.327693,1948-49,,6,1610612752,New York,Knicks,,New York Knicks,1949


In [5]:
drop_names = ['CONF_COUNT', 'DIV_COUNT', 'TEAM_ID', 'TEAM_CITY', 'TEAM_NAME', 'NBA_FINALS_APPEARANCE']
year_data = normalize_by_year(year_data, not_considering=drop_names)

# Normalize nae and year across our two data sources
year_data['TEAM_FULLNAME'] = year_data['TEAM_CITY'] + ' ' + year_data['TEAM_NAME']
year_data['PLAYOFF_YEAR'] = year_data['YEAR'].map(season_to_playoff_year)

# Get cartesian product of year_data with itself by year
year_data = pd.merge(year_data, year_data, on='PLAYOFF_YEAR', suffixes=('_winner', '_loser'))

# Join with our playoff results data
joined_data = pd.merge(year_data, series_data, left_on=['TEAM_FULLNAME_winner', 'TEAM_FULLNAME_loser', 'PLAYOFF_YEAR'], right_on=['Winner', 'Loser', 'Year'], how='inner')

In [10]:
joined_data

Unnamed: 0,GP_winner,WINS_winner,LOSSES_winner,WIN_PCT_winner,CONF_RANK_winner,DIV_RANK_winner,PO_WINS_winner,PO_LOSSES_winner,FGM_winner,FGA_winner,...,TEAM_FULLNAME_loser,Winner,Winner Seed,Winner Wins,Loser,Loser Seed,Loser Wins,Margin,Series,Year
0,0.000000,0.874818,-0.874818,0.873636,0.000000,-1.069045,1.372813,0.267261,1.155493,0.000000,...,New York Knicks,Philadelphia Warriors,2,2,New York Knicks,2,0,2,Semifinals,1947
1,0.000000,1.273310,-1.273310,1.272490,0.000000,-0.741999,2.044722,0.447214,1.231111,0.000000,...,Rochester Royals,Minneapolis Lakers,2,2,Rochester Royals,2,0,2,Western Div Finals,1949
2,0.577350,1.100360,-0.988832,1.017663,0.000000,-0.982708,2.167562,-0.098533,2.100130,0.000000,...,Syracuse Nationals,Minneapolis Lakers,2,4,Syracuse Nationals,2,2,2,Finals,1950
3,0.577350,1.100360,-0.988832,1.017663,0.000000,-0.982708,2.167562,-0.098533,2.100130,0.000000,...,Rochester Royals,Minneapolis Lakers,,1,Rochester Royals,,0,1,Central Div 1st Place Tiebreak,1950
4,-1.732051,1.100360,-1.340417,1.297372,0.000000,-0.982708,0.971666,2.266257,0.483294,0.000000,...,Philadelphia Warriors,Syracuse Nationals,1,2,Philadelphia Warriors,1,0,2,Eastern Div Semifinals,1950
5,-1.732051,1.100360,-1.340417,1.297372,0.000000,-0.982708,0.971666,2.266257,0.483294,0.000000,...,New York Knicks,Syracuse Nationals,1,2,New York Knicks,1,1,1,Eastern Div Finals,1950
6,-1.237597,-0.021642,-0.215365,0.103047,0.000000,0.284747,1.430771,1.664101,1.506508,0.000000,...,Boston Celtics,New York Knicks,3,2,Boston Celtics,3,0,2,Eastern Div Semifinals,1951
7,-1.237597,-0.021642,-0.215365,0.103047,0.000000,0.284747,1.430771,1.664101,1.506508,0.000000,...,Syracuse Nationals,New York Knicks,3,3,Syracuse Nationals,3,2,1,Eastern Div Finals,1951
8,-1.237597,-0.714192,0.473804,-0.603560,0.000000,1.044074,0.256805,0.000000,-1.818199,0.000000,...,Philadelphia Warriors,Syracuse Nationals,4,2,Philadelphia Warriors,4,0,2,Eastern Div Semifinals,1951
9,0.562544,0.844045,-0.732242,0.786100,0.000000,-0.474579,1.724263,1.109400,0.121213,0.000000,...,Minneapolis Lakers,Rochester Royals,2,3,Minneapolis Lakers,2,1,2,Western Div Finals,1951


In [15]:
joined_data = joined_data[joined_data['Winner Wins'] != 1]

Unnamed: 0,GP_winner,WINS_winner,LOSSES_winner,WIN_PCT_winner,CONF_RANK_winner,DIV_RANK_winner,PO_WINS_winner,PO_LOSSES_winner,FGM_winner,FGA_winner,...,TEAM_FULLNAME_loser,Winner,Winner Seed,Winner Wins,Loser,Loser Seed,Loser Wins,Margin,Series,Year
0,0.000000,0.874818,-0.874818,0.873636,0.000000,-1.069045,1.372813,0.267261,1.155493,0.000000,...,New York Knicks,Philadelphia Warriors,2,2,New York Knicks,2,0,2,Semifinals,1947
1,0.000000,1.273310,-1.273310,1.272490,0.000000,-0.741999,2.044722,0.447214,1.231111,0.000000,...,Rochester Royals,Minneapolis Lakers,2,2,Rochester Royals,2,0,2,Western Div Finals,1949
2,0.577350,1.100360,-0.988832,1.017663,0.000000,-0.982708,2.167562,-0.098533,2.100130,0.000000,...,Syracuse Nationals,Minneapolis Lakers,2,4,Syracuse Nationals,2,2,2,Finals,1950
4,-1.732051,1.100360,-1.340417,1.297372,0.000000,-0.982708,0.971666,2.266257,0.483294,0.000000,...,Philadelphia Warriors,Syracuse Nationals,1,2,Philadelphia Warriors,1,0,2,Eastern Div Semifinals,1950
5,-1.732051,1.100360,-1.340417,1.297372,0.000000,-0.982708,0.971666,2.266257,0.483294,0.000000,...,New York Knicks,Syracuse Nationals,1,2,New York Knicks,1,1,1,Eastern Div Finals,1950
6,-1.237597,-0.021642,-0.215365,0.103047,0.000000,0.284747,1.430771,1.664101,1.506508,0.000000,...,Boston Celtics,New York Knicks,3,2,Boston Celtics,3,0,2,Eastern Div Semifinals,1951
7,-1.237597,-0.021642,-0.215365,0.103047,0.000000,0.284747,1.430771,1.664101,1.506508,0.000000,...,Syracuse Nationals,New York Knicks,3,3,Syracuse Nationals,3,2,1,Eastern Div Finals,1951
8,-1.237597,-0.714192,0.473804,-0.603560,0.000000,1.044074,0.256805,0.000000,-1.818199,0.000000,...,Philadelphia Warriors,Syracuse Nationals,4,2,Philadelphia Warriors,4,0,2,Eastern Div Semifinals,1951
9,0.562544,0.844045,-0.732242,0.786100,0.000000,-0.474579,1.724263,1.109400,0.121213,0.000000,...,Minneapolis Lakers,Rochester Royals,2,3,Minneapolis Lakers,2,1,2,Western Div Finals,1951
10,0.562544,0.844045,-0.732242,0.786100,0.000000,-0.474579,1.724263,1.109400,0.121213,0.000000,...,New York Knicks,Rochester Royals,2,4,New York Knicks,2,3,1,Finals,1951
