# Fields for tournament games

# Informational fields:
* date
* season
* team
* opponent
* win/loss

# Predictor fields
* regular_season_win_pct - just conference and non-conference games
* opponent_regular_season_win_pct
* season_win_pct - includes conference tourney games
* opponent_season_win_pct
* season_ppg - includes conference tourney games
* opponent_season_ppg
* season_ppg_allowed - includes conference tourney games
* opponent_season_ppg_allowed
* last_10_games_win_pct - includes conference tourney games
* opponent_last_10_games_win_pct
* last_5_games_win_pct - includes conference tourney games
* opponent_last_5_games_win_pct
* vs_tourney_team_win_pct - includes conference tourney games
* opponent_vs_tourney_team_win_pct
* games_played_vs_tourney_teams - includes conference tourney games
* opponent_games_played_vs_tourney_teams
* season_adj_offense - includes conference tourney games
* opponent_season_adj_offense
* vs_tourney_team_adj_offense - includes conference tourney games
* opponent_vs_tourney_team_adj_offense
* season_adj_defense - includes conference tourney games
* opponent_season_adj_defense
* vs_tourney_team_adj_defense - includes conference tourney games
* opponent_vs_tourney_team_adj_defense
* season_offensive_efficiency - includes conference tourney games
* opponent_season_offensive_efficiency
* vs_tourney_team_offensive_efficiency - includes conference tourney games
* opponent_vs_tourney_team_offensive_efficiency
* season_effective_field_goal_pct - includes conference tourney games
* opponent_season_effective_field_goal_pct
* vs_tourney_team_effective_field_goal_pct - includes conference tourney games
* opponent_vs_tourney_team_effective_field_goal_pct
* season_turnover_pct - includes conference tourney games
* opponent_season_turnover_pct
* vs_tourney_team_turnover_pct - includes conference tourney games
* opponent_vs_tourney_team_turnover_pct
* season_off_reb_pct - includes conference tourney games
* opponent_season_off_reb_pct
* vs_tourney_team_off_reb_pct - includes conference tourney games
* opponent_vs_tourney_team_off_reb_pct
* season_ft_rate - includes conference tourney games
* opponent_season_ft_rate
* vs_tourney_team_ft_rate - includes conference tourney games
* opponent_vs_tourney_team_ft_rate
* season_allowed_offensive_efficiency - includes conference tourney games
* opponent_season_allowed_offensive_efficiency
* vs_tourney_team_allowed_offensive_efficiency - includes conference tourney games
* opponent_vs_tourney_team_allowed_offensive_efficiency
* season_allowed_effective_field_goal_pct - includes conference tourney games
* opponent_season_allowed_effective_field_goal_pct
* vs_tourney_team_allowed_effective_field_goal_pct - includes conference tourney games
* opponent_vs_tourney_team_allowed_effective_field_goal_pct
* season_allowed_turnover_pct - includes conference tourney games
* opponent_season_allowed_turnover_pct
* vs_tourney_team_allowed_turnover_pct - includes conference tourney games
* opponent_vs_tourney_team_allowed_turnover_pct
* season_allowed_off_reb_pct - includes conference tourney games
* opponent_season_allowed_off_reb_pct
* vs_tourney_team_allowed_off_reb_pct - includes conference tourney games
* opponent_vs_tourney_team_allowed_off_reb_pct
* season_allowed_ft_rate - includes conference tourney games
* opponent_season_allowed_ft_rate
* vs_tourney_team_allowed_ft_rate - includes conference tourney games
* opponent_vs_tourney_team_allowed_ft_rate
* season_allowed_adj_offense - includes conference tourney games
* opponent_season_allowed_adj_offense
* vs_tourney_team_allowed_adj_offense - includes conference tourney games
* opponent_vs_tourney_team_allowed_adj_offense
* season_allowed_adj_defense - includes conference tourney games
* opponent_season_allowed_adj_defense
* vs_tourney_team_allowed_adj_defense - includes conference tourney games
* opponent_vs_tourney_team_allowed_adj_defense

In [119]:
import os
import pandas as pd
import sqlalchemy
import sklearn
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

connection_string = os.getenv("LINODE_CONN_STRING")
connection_string = os.getenv("LINODE_CONN_STRING")
conn = sqlalchemy.create_engine(os.getenv("LINODE_CONN_STRING")).connect()

In [120]:
all_games = pd.read_sql("select * from sports_data.game_stats_v2", conn)

In [121]:
tourney_games = all_games[all_games["game_type"] == "Post-Season Tourney"]
tourney_games = tourney_games[tourney_games['location'] == 'N']

In [122]:
tourney_games.columns

Index(['date', 'game_type', 'team', 'conference', 'opponent', 'location',
       'result', 'adj_offense', 'adj_defense', 'offensive_efficiency',
       'effective_fg_percent', 'turnover_percent', 'offensive_rebound_percent',
       'free_throw_rate', 'opponent_offensive_efficiency',
       'opponent_effective_fg_percent', 'opponent_turnover_percent',
       'opponent_offensive_rebound_percent', 'opponent_free_throw_rate',
       'opponent_conference', 'season', 'Win_Loss', 'score', 'opponent_score'],
      dtype='object')

In [123]:
tourney_games = tourney_games[['date', 'season', 'team', 'opponent', 'Win_Loss']]

In [124]:
all_games['game_type'].unique()

array(['Non-Conference', 'Conference', 'Conference Tourney',
       'Post-Season Tourney'], dtype=object)

In [125]:
regular_season_games = all_games[all_games["game_type"].isin(['Non-Conference', 'Conference'])]
pre_tourney_games = all_games[all_games["game_type"].isin(['Non-Conference', 'Conference', 'Conference Tourney'])]
last_5_non_tourney = all_games[all_games['game_type'] != 'Post-Season Tourney'].sort_values('date').groupby(['team', 'season']).tail(5).sort_values(['team', 'season', 'date'])
last_10_non_tourney = all_games[all_games['game_type'] != 'Post-Season Tourney'].sort_values('date').groupby(['team', 'season']).tail(10).sort_values(['team', 'season', 'date'])
all_games['opp_season'] = all_games.apply(lambda x: f"{x['opponent']}_{x['season']}", axis=1)
tourney_teams = tourney_games.apply(lambda x: f"{x['team']}_{x['season']}", axis=1).unique()
games_vs_tourney_teams = all_games[all_games['opp_season'].isin(tourney_teams)]

In [126]:
working = regular_season_games.groupby(['team', 'season','Win_Loss'], as_index=False).size()
working = working.pivot_table(index=['team', 'season'], columns='Win_Loss', values='size', fill_value=0).reset_index()[['team', 'season', 'L', 'W']]
working['regular_season_win_pct'] = working['W'] / (working['W'] + working['L'])
regular_season_win_pct = working[['team', 'season', 'regular_season_win_pct']]

In [127]:
tourney_games = tourney_games.merge(regular_season_win_pct, on=['team', 'season'], how='left')

In [128]:
regular_season_win_pct.rename(columns={'regular_season_win_pct': 'opponent_regular_season_win_pct', 'team':'opponent'}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  regular_season_win_pct.rename(columns={'regular_season_win_pct': 'opponent_regular_season_win_pct', 'team':'opponent'}, inplace=True)


In [129]:
tourney_games

Unnamed: 0,date,season,team,opponent,Win_Loss,regular_season_win_pct
0,3/17/23,2023,Arizona St.,TCU,L,0.645161
1,3/17/23,2023,TCU,Arizona St.,W,0.645161
2,3/23/23,2023,Arkansas,Connecticut,L,0.612903
3,3/23/23,2023,Connecticut,Arkansas,W,0.774194
4,3/18/23,2023,Arkansas,Kansas,W,0.612903
...,...,...,...,...,...,...
1237,3/19/15,2015,Wofford,Arkansas,L,0.793103
1238,3/20/15,2015,Northern Iowa,Wyoming,W,0.900000
1239,3/20/15,2015,Wyoming,Northern Iowa,L,0.689655
1240,3/26/15,2015,Arizona,Xavier,W,0.903226


In [130]:
tourney_games = tourney_games.merge(regular_season_win_pct, on=['opponent', 'season'], how='left')

In [131]:
working = pre_tourney_games.groupby(['team', 'season','Win_Loss'], as_index=False).size()
working = working.pivot_table(index=['team', 'season'], columns='Win_Loss', values='size', fill_value=0).reset_index()[['team', 'season', 'L', 'W']]
working['season_win_pct'] = working['W'] / (working['W'] + working['L'])
season_win_pct = working[['team', 'season', 'season_win_pct']]

In [132]:
tourney_games = tourney_games.merge(season_win_pct, on=['team', 'season'], how='left')

In [133]:
season_win_pct.rename(columns={'season_win_pct': 'opponent_season_win_pct', 'team':'opponent'}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  season_win_pct.rename(columns={'season_win_pct': 'opponent_season_win_pct', 'team':'opponent'}, inplace=True)


In [134]:
tourney_games.merge(season_win_pct, on=['opponent', 'season'], how='left')

Unnamed: 0,date,season,team,opponent,Win_Loss,regular_season_win_pct,opponent_regular_season_win_pct,season_win_pct,opponent_season_win_pct
0,3/17/23,2023,Arizona St.,TCU,L,0.645161,0.645161,0.647059,0.636364
1,3/17/23,2023,TCU,Arizona St.,W,0.645161,0.645161,0.636364,0.647059
2,3/23/23,2023,Arkansas,Connecticut,L,0.612903,0.774194,0.606061,0.757576
3,3/23/23,2023,Connecticut,Arkansas,W,0.774194,0.612903,0.757576,0.606061
4,3/18/23,2023,Arkansas,Kansas,W,0.612903,0.806452,0.606061,0.794118
...,...,...,...,...,...,...,...,...,...
1237,3/19/15,2015,Wofford,Arkansas,L,0.793103,0.774194,0.812500,0.764706
1238,3/20/15,2015,Northern Iowa,Wyoming,W,0.900000,0.689655,0.909091,0.718750
1239,3/20/15,2015,Wyoming,Northern Iowa,L,0.689655,0.900000,0.718750,0.909091
1240,3/26/15,2015,Arizona,Xavier,W,0.903226,0.612903,0.911765,0.617647


In [135]:
pre_tourney_games['score'] = pre_tourney_games['score'].astype(int)
season_ppg = pre_tourney_games[['team', 'season','score']].groupby(['team', 'season']).mean().reset_index().rename(columns={'score': 'season_ppg'})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pre_tourney_games['score'] = pre_tourney_games['score'].astype(int)


In [136]:
tourney_games = tourney_games.merge(season_ppg, on=['team', 'season'], how='left')

In [137]:
season_ppg.rename(columns={'season_ppg': 'opponent_season_ppg', 'team':'opponent'}, inplace=True)
tourney_games = tourney_games.merge(season_ppg, on=['opponent', 'season'], how='left')

In [138]:
pre_tourney_games['opponent_score'] = pre_tourney_games['opponent_score'].astype(int)
season_ppg_allowed = pre_tourney_games[['team', 'season','opponent_score']].groupby(['team', 'season']).mean().reset_index().rename(columns={'opponent_score': 'season_ppg_allowed'})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pre_tourney_games['opponent_score'] = pre_tourney_games['opponent_score'].astype(int)


In [139]:
tourney_games = tourney_games.merge(season_ppg_allowed, on=['team', 'season'], how='left')

In [140]:
season_ppg_allowed.rename(columns={'season_ppg_allowed': 'opponent_season_ppg_allowed', 'team':'opponent'}, inplace=True)
tourney_games = tourney_games.merge(season_ppg_allowed, on=['opponent', 'season'], how='left')

In [141]:
working = last_10_non_tourney.groupby(['team', 'season','Win_Loss'], as_index=False).size()
working = working.pivot_table(index=['team', 'season'], columns='Win_Loss', values='size', fill_value=0).reset_index()[['team', 'season', 'L', 'W']]
working['last_10_win_pct'] = working['W'] / (working['W'] + working['L'])
last_10_win_pct = working[['team', 'season', 'last_10_win_pct']]

In [142]:
tourney_games = tourney_games.merge(last_10_win_pct, on=['team', 'season'], how='left')

In [143]:
last_10_win_pct.rename(columns={'last_10_win_pct': 'opponent_last_10_win_pct', 'team':'opponent'}, inplace=True)
tourney_games = tourney_games.merge(last_10_win_pct, on=['opponent', 'season'], how='left')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  last_10_win_pct.rename(columns={'last_10_win_pct': 'opponent_last_10_win_pct', 'team':'opponent'}, inplace=True)


In [144]:
working = last_5_non_tourney.groupby(['team', 'season','Win_Loss'], as_index=False).size()
working = working.pivot_table(index=['team', 'season'], columns='Win_Loss', values='size', fill_value=0).reset_index()[['team', 'season', 'L', 'W']]
working['last_5_win_pct'] = working['W'] / (working['W'] + working['L'])
last_5_win_pct = working[['team', 'season', 'last_5_win_pct']]

In [145]:
tourney_games = tourney_games.merge(last_5_win_pct, on=['team', 'season'], how='left')

In [146]:
last_5_win_pct.rename(columns={'last_5_win_pct': 'opponent_last_5_win_pct', 'team':'opponent'}, inplace=True)
tourney_games = tourney_games.merge(last_5_win_pct, on=['opponent', 'season'], how='left')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  last_5_win_pct.rename(columns={'last_5_win_pct': 'opponent_last_5_win_pct', 'team':'opponent'}, inplace=True)


In [147]:
working = games_vs_tourney_teams.groupby(['team', 'season','Win_Loss'], as_index=False).size()
working = working.pivot_table(index=['team', 'season'], columns='Win_Loss', values='size', fill_value=0).reset_index()[['team', 'season', 'L', 'W']]
working['vs_tourney_team_win_pct'] = working['W'] / (working['W'] + working['L'])
vs_tourney_team_win_pct = working[['team', 'season', 'vs_tourney_team_win_pct']]

In [148]:
tourney_games = tourney_games.merge(vs_tourney_team_win_pct, on=['team', 'season'], how='left')

In [149]:
vs_tourney_team_win_pct.rename(columns={'vs_tourney_team_win_pct': 'opponent_vs_tourney_team_win_pct', 'team':'opponent'}, inplace=True)
tourney_games = tourney_games.merge(vs_tourney_team_win_pct, on=['opponent', 'season'], how='left')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  vs_tourney_team_win_pct.rename(columns={'vs_tourney_team_win_pct': 'opponent_vs_tourney_team_win_pct', 'team':'opponent'}, inplace=True)


In [150]:
working = games_vs_tourney_teams.groupby(['team', 'season','Win_Loss'], as_index=False).size()
working = working.pivot_table(index=['team', 'season'], columns='Win_Loss', values='size', fill_value=0).reset_index()[['team', 'season', 'L', 'W']]
working['games_played_vs_tourney_teams'] = (working['W'] + working['L'])
games_played_vs_tourney_teams = working[['team', 'season', 'games_played_vs_tourney_teams']]

In [151]:
tourney_games = tourney_games.merge(games_played_vs_tourney_teams, on=['team', 'season'], how='left')

In [152]:
games_played_vs_tourney_teams.rename(columns={'games_played_vs_tourney_teams': 'opponent_games_played_vs_tourney_teams', 'team':'opponent'}, inplace=True)
tourney_games = tourney_games.merge(games_played_vs_tourney_teams, on=['opponent', 'season'], how='left')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  games_played_vs_tourney_teams.rename(columns={'games_played_vs_tourney_teams': 'opponent_games_played_vs_tourney_teams', 'team':'opponent'}, inplace=True)


In [153]:
pre_tourney_games.columns

Index(['date', 'game_type', 'team', 'conference', 'opponent', 'location',
       'result', 'adj_offense', 'adj_defense', 'offensive_efficiency',
       'effective_fg_percent', 'turnover_percent', 'offensive_rebound_percent',
       'free_throw_rate', 'opponent_offensive_efficiency',
       'opponent_effective_fg_percent', 'opponent_turnover_percent',
       'opponent_offensive_rebound_percent', 'opponent_free_throw_rate',
       'opponent_conference', 'season', 'Win_Loss', 'score', 'opponent_score'],
      dtype='object')

In [154]:
season_adj_offense = pre_tourney_games[['team', 'season','adj_offense']].groupby(['team', 'season']).mean().reset_index().rename(columns={'adj_offense': 'season_adj_offense'})

In [155]:
tourney_games = tourney_games.merge(season_adj_offense, on=['team', 'season'], how='left')

In [156]:
season_adj_offense.rename(columns={'season_adj_offense': 'opponent_season_adj_offense', 'team':'opponent'}, inplace=True)
tourney_games = tourney_games.merge(season_adj_offense, on=['opponent', 'season'], how='left')

In [157]:
season_adj_defense = pre_tourney_games[['team', 'season','adj_defense']].groupby(['team', 'season']).mean().reset_index().rename(columns={'adj_defense': 'season_adj_defense'})

In [158]:
tourney_games = tourney_games.merge(season_adj_defense, on=['team', 'season'], how='left')

In [159]:
season_adj_defense.rename(columns={'season_adj_defense': 'opponent_season_adj_defense', 'team':'opponent'}, inplace=True)
tourney_games = tourney_games.merge(season_adj_defense, on=['opponent', 'season'], how='left')

In [160]:
vs_tourney_team_adj_offense = games_vs_tourney_teams[['team', 'season','adj_offense']].groupby(['team', 'season']).mean().reset_index().rename(columns={'adj_offense': 'vs_tourney_team_adj_offense'})

In [161]:
tourney_games = tourney_games.merge(vs_tourney_team_adj_offense, on=['team', 'season'], how='left')

In [162]:
vs_tourney_team_adj_offense.rename(columns={'vs_tourney_team_adj_offense': 'opponent_vs_tourney_team_adj_offense', 'team':'opponent'}, inplace=True)

In [163]:
tourney_games = tourney_games.merge(vs_tourney_team_adj_offense, on=['opponent', 'season'], how='left')

In [164]:
vs_tourney_team_adj_defense = games_vs_tourney_teams[['team', 'season','adj_defense']].groupby(['team', 'season']).mean().reset_index().rename(columns={'adj_defense': 'vs_tourney_team_adj_defense'})

In [165]:
tourney_games = tourney_games.merge(vs_tourney_team_adj_defense, on=['team', 'season'], how='left')

In [166]:
vs_tourney_team_adj_defense.rename(columns={'vs_tourney_team_adj_defense': 'opponent_vs_tourney_team_adj_defense', 'team':'opponent'}, inplace=True)

In [167]:
tourney_games = tourney_games.merge(vs_tourney_team_adj_defense, on=['opponent', 'season'], how='left')

In [168]:
season_offensive_efficiency = pre_tourney_games[['team', 'season','offensive_efficiency']].groupby(['team', 'season']).mean().reset_index().rename(columns={'offensive_efficiency': 'season_offensive_efficiency'})

In [169]:
tourney_games = tourney_games.merge(season_offensive_efficiency, on=['team', 'season'], how='left')

In [170]:
season_offensive_efficiency.rename(columns={'season_offensive_efficiency': 'opponent_season_offensive_efficiency', 'team':'opponent'}, inplace=True)
tourney_games = tourney_games.merge(season_offensive_efficiency, on=['opponent', 'season'], how='left')

In [171]:
vs_tourney_team_offensive_efficiency = games_vs_tourney_teams[['team', 'season','offensive_efficiency']].groupby(['team', 'season']).mean().reset_index().rename(columns={'offensive_efficiency': 'vs_tourney_team_offensive_efficiency'})

In [172]:
tourney_games = tourney_games.merge(vs_tourney_team_offensive_efficiency, on=['team', 'season'], how='left')

In [173]:
vs_tourney_team_offensive_efficiency.rename(columns={'vs_tourney_team_offensive_efficiency': 'opponent_vs_tourney_team_offensive_efficiency', 'team':'opponent'}, inplace=True)
tourney_games = tourney_games.merge(vs_tourney_team_offensive_efficiency, on=['opponent', 'season'], how='left')

In [174]:
season_effective_fg_percent = pre_tourney_games[['team', 'season','effective_fg_percent']].groupby(['team', 'season']).mean().reset_index().rename(columns={'effective_fg_percent': 'season_effective_fg_percent'})

In [175]:
tourney_games = tourney_games.merge(season_effective_fg_percent, on=['team', 'season'], how='left')

In [176]:
season_effective_fg_percent.rename(columns={'season_effective_fg_percent': 'opponent_season_effective_fg_percent', 'team':'opponent'}, inplace=True)
tourney_games = tourney_games.merge(season_effective_fg_percent, on=['opponent', 'season'], how='left')

In [177]:
vs_tourney_team_effective_fg_percent = games_vs_tourney_teams[['team', 'season','effective_fg_percent']].groupby(['team', 'season']).mean().reset_index().rename(columns={'effective_fg_percent': 'vs_tourney_team_effective_fg_percent'})
tourney_games = tourney_games.merge(vs_tourney_team_effective_fg_percent, on=['team', 'season'], how='left')
vs_tourney_team_effective_fg_percent.rename(columns={'vs_tourney_team_effective_fg_percent': 'opponent_vs_tourney_team_effective_fg_percent', 'team':'opponent'}, inplace=True)
tourney_games = tourney_games.merge(vs_tourney_team_effective_fg_percent, on=['opponent', 'season'], how='left')

In [178]:
season_turnover_percent = pre_tourney_games[['team', 'season','turnover_percent']].groupby(['team', 'season']).mean().reset_index().rename(columns={'turnover_percent': 'season_turnover_percent'})
tourney_games = tourney_games.merge(season_turnover_percent, on=['team', 'season'], how='left')
season_turnover_percent.rename(columns={'season_turnover_percent': 'opponent_season_turnover_percent', 'team':'opponent'}, inplace=True)
tourney_games = tourney_games.merge(season_turnover_percent, on=['opponent', 'season'], how='left')

In [179]:
vs_tourney_team_turnover_percent = games_vs_tourney_teams[['team', 'season','turnover_percent']].groupby(['team', 'season']).mean().reset_index().rename(columns={'turnover_percent': 'vs_tourney_team_turnover_percent'})
tourney_games = tourney_games.merge(vs_tourney_team_turnover_percent, on=['team', 'season'], how='left')
vs_tourney_team_turnover_percent.rename(columns={'vs_tourney_team_turnover_percent': 'opponent_vs_tourney_team_turnover_percent', 'team':'opponent'}, inplace=True)
tourney_games = tourney_games.merge(vs_tourney_team_turnover_percent, on=['opponent', 'season'], how='left')

In [180]:
season_offensive_rebound_percent = pre_tourney_games[['team', 'season','offensive_rebound_percent']].groupby(['team', 'season']).mean().reset_index().rename(columns={'offensive_rebound_percent': 'season_offensive_rebound_percent'})
tourney_games = tourney_games.merge(season_offensive_rebound_percent, on=['team', 'season'], how='left')
season_offensive_rebound_percent.rename(columns={'season_offensive_rebound_percent': 'opponent_season_offensive_rebound_percent', 'team':'opponent'}, inplace=True)
tourney_games = tourney_games.merge(season_offensive_rebound_percent, on=['opponent', 'season'], how='left')

In [181]:
vs_tourney_team_offensive_rebound_percent = games_vs_tourney_teams[['team', 'season','offensive_rebound_percent']].groupby(['team', 'season']).mean().reset_index().rename(columns={'offensive_rebound_percent': 'vs_tourney_team_offensive_rebound_percent'})
tourney_games = tourney_games.merge(vs_tourney_team_offensive_rebound_percent, on=['team', 'season'], how='left')
vs_tourney_team_offensive_rebound_percent.rename(columns={'vs_tourney_team_offensive_rebound_percent': 'opponent_vs_tourney_team_offensive_rebound_percent', 'team':'opponent'}, inplace=True)
tourney_games = tourney_games.merge(vs_tourney_team_offensive_rebound_percent, on=['opponent', 'season'], how='left')

In [182]:
season_free_throw_rate = pre_tourney_games[['team', 'season','free_throw_rate']].groupby(['team', 'season']).mean().reset_index().rename(columns={'free_throw_rate': 'season_free_throw_rate'})
tourney_games = tourney_games.merge(season_free_throw_rate, on=['team', 'season'], how='left')
season_free_throw_rate.rename(columns={'season_free_throw_rate': 'opponent_season_free_throw_rate', 'team':'opponent'}, inplace=True)
tourney_games = tourney_games.merge(season_free_throw_rate, on=['opponent', 'season'], how='left')

In [183]:
vs_tourney_team_free_throw_rate = games_vs_tourney_teams[['team', 'season','free_throw_rate']].groupby(['team', 'season']).mean().reset_index().rename(columns={'free_throw_rate': 'vs_tourney_team_free_throw_rate'})
tourney_games = tourney_games.merge(vs_tourney_team_free_throw_rate, on=['team', 'season'], how='left')
vs_tourney_team_free_throw_rate.rename(columns={'vs_tourney_team_free_throw_rate': 'opponent_vs_tourney_team_free_throw_rate', 'team':'opponent'}, inplace=True)
tourney_games = tourney_games.merge(vs_tourney_team_free_throw_rate, on=['opponent', 'season'], how='left')

In [184]:
season_allowed_offensive_efficiency = pre_tourney_games[['team', 'season','opponent_offensive_efficiency']].groupby(['team', 'season']).mean().reset_index().rename(columns={'opponent_offensive_efficiency': 'season_allowed_offensive_efficiency'})
tournament_games = tourney_games.merge(season_allowed_offensive_efficiency, on=['team', 'season'], how='left')
season_allowed_offensive_efficiency.rename(columns={'season_allowed_offensive_efficiency': 'opponent_season_allowed_offensive_efficiency', 'team':'opponent'}, inplace=True)
tourney_games = tourney_games.merge(season_allowed_offensive_efficiency, on=['opponent', 'season'], how='left')

In [185]:
vs_tourney_team_allowed_offensive_efficiency = games_vs_tourney_teams[['team', 'season','opponent_offensive_efficiency']].groupby(['team', 'season']).mean().reset_index().rename(columns={'opponent_offensive_efficiency': 'vs_tourney_team_allowed_offensive_efficiency'})
tourney_games = tourney_games.merge(vs_tourney_team_allowed_offensive_efficiency, on=['team', 'season'], how='left')
vs_tourney_team_allowed_offensive_efficiency.rename(columns={'vs_tourney_team_allowed_offensive_efficiency': 'opponent_vs_tourney_team_allowed_offensive_efficiency', 'team':'opponent'}, inplace=True)
tourney_games = tourney_games.merge(vs_tourney_team_allowed_offensive_efficiency, on=['opponent', 'season'], how='left')

In [186]:
season_allowed_effective_fg_percent = pre_tourney_games[['team', 'season','opponent_effective_fg_percent']].groupby(['team', 'season']).mean().reset_index().rename(columns={'opponent_effective_fg_percent': 'season_allowed_effective_fg_percent'})
tourney_games = tourney_games.merge(season_allowed_effective_fg_percent, on=['team', 'season'], how='left')
season_allowed_effective_fg_percent.rename(columns={'season_allowed_effective_fg_percent': 'opponent_season_allowed_effective_fg_percent', 'team':'opponent'}, inplace=True)
tourney_games = tourney_games.merge(season_allowed_effective_fg_percent, on=['opponent', 'season'], how='left')

In [187]:
vs_tourney_team_allowed_effective_fg_percent = games_vs_tourney_teams[['team', 'season','opponent_effective_fg_percent']].groupby(['team', 'season']).mean().reset_index().rename(columns={'opponent_effective_fg_percent': 'vs_tourney_team_allowed_effective_fg_percent'})
tourney_games = tourney_games.merge(vs_tourney_team_allowed_effective_fg_percent, on=['team', 'season'], how='left')
vs_tourney_team_allowed_effective_fg_percent.rename(columns={'vs_tourney_team_allowed_effective_fg_percent': 'opponent_vs_tourney_team_allowed_effective_fg_percent', 'team':'opponent'}, inplace=True)
tourney_games = tourney_games.merge(vs_tourney_team_allowed_effective_fg_percent, on=['opponent', 'season'], how='left')

In [188]:
season_allowed_turnover_percent = pre_tourney_games[['team', 'season','opponent_turnover_percent']].groupby(['team', 'season']).mean().reset_index().rename(columns={'opponent_turnover_percent': 'season_allowed_turnover_percent'})
tourney_games = tourney_games.merge(season_allowed_turnover_percent, on=['team', 'season'], how='left')
season_allowed_turnover_percent.rename(columns={'season_allowed_turnover_percent': 'opponent_season_allowed_turnover_percent', 'team':'opponent'}, inplace=True)
tourney_games = tourney_games.merge(season_allowed_turnover_percent, on=['opponent', 'season'], how='left')

In [189]:
vs_tourney_team_allowed_turnover_percent = games_vs_tourney_teams[['team', 'season','opponent_turnover_percent']].groupby(['team', 'season']).mean().reset_index().rename(columns={'opponent_turnover_percent': 'vs_tourney_team_allowed_turnover_percent'})
tourney_games = tourney_games.merge(vs_tourney_team_allowed_turnover_percent, on=['team', 'season'], how='left')
vs_tourney_team_allowed_turnover_percent.rename(columns={'vs_tourney_team_allowed_turnover_percent': 'opponent_vs_tourney_team_allowed_turnover_percent', 'team':'opponent'}, inplace=True)
tourney_games = tourney_games.merge(vs_tourney_team_allowed_turnover_percent, on=['opponent', 'season'], how='left')

In [190]:
season_allowed_offensive_rebound_percent = pre_tourney_games[['team', 'season','opponent_offensive_rebound_percent']].groupby(['team', 'season']).mean().reset_index().rename(columns={'opponent_offensive_rebound_percent': 'season_allowed_offensive_rebound_percent'})
tourney_games = tourney_games.merge(season_allowed_offensive_rebound_percent, on=['team', 'season'], how='left')
season_allowed_offensive_rebound_percent.rename(columns={'season_allowed_offensive_rebound_percent': 'opponent_season_allowed_offensive_rebound_percent', 'team':'opponent'}, inplace=True)
tourney_games = tourney_games.merge(season_allowed_offensive_rebound_percent, on=['opponent', 'season'], how='left')

In [191]:
vs_tourney_team_allowed_offensive_rebound_percent = games_vs_tourney_teams[['team', 'season','opponent_offensive_rebound_percent']].groupby(['team', 'season']).mean().reset_index().rename(columns={'opponent_offensive_rebound_percent': 'vs_tourney_team_allowed_offensive_rebound_percent'})
tourney_games = tourney_games.merge(vs_tourney_team_allowed_offensive_rebound_percent, on=['team', 'season'], how='left')
vs_tourney_team_allowed_offensive_rebound_percent.rename(columns={'vs_tourney_team_allowed_offensive_rebound_percent': 'opponent_vs_tourney_team_allowed_offensive_rebound_percent', 'team':'opponent'}, inplace=True)
tourney_games = tourney_games.merge(vs_tourney_team_allowed_offensive_rebound_percent, on=['opponent', 'season'], how='left')

In [192]:
season_allowed_free_throw_rate = pre_tourney_games[['team', 'season','opponent_free_throw_rate']].groupby(['team', 'season']).mean().reset_index().rename(columns={'opponent_free_throw_rate': 'season_allowed_free_throw_rate'})
tourney_games = tourney_games.merge(season_allowed_free_throw_rate, on=['team', 'season'], how='left')
season_allowed_free_throw_rate.rename(columns={'season_allowed_free_throw_rate': 'opponent_season_allowed_free_throw_rate', 'team':'opponent'}, inplace=True)
tourney_games = tourney_games.merge(season_allowed_free_throw_rate, on=['opponent', 'season'], how='left')

In [193]:
vs_tourney_team_allowed_free_throw_rate = games_vs_tourney_teams[['team', 'season','opponent_free_throw_rate']].groupby(['team', 'season']).mean().reset_index().rename(columns={'opponent_free_throw_rate': 'vs_tourney_team_allowed_free_throw_rate'})
tourney_games = tourney_games.merge(vs_tourney_team_allowed_free_throw_rate, on=['team', 'season'], how='left')
vs_tourney_team_allowed_free_throw_rate.rename(columns={'vs_tourney_team_allowed_free_throw_rate': 'opponent_vs_tourney_team_allowed_free_throw_rate', 'team':'opponent'}, inplace=True)
tourney_games = tourney_games.merge(vs_tourney_team_allowed_free_throw_rate, on=['opponent', 'season'], how='left')

In [196]:
tourney_games

Unnamed: 0,date,season,team,opponent,Win_Loss,regular_season_win_pct,opponent_regular_season_win_pct,season_win_pct,season_ppg,opponent_season_ppg,...,vs_tourney_team_allowed_turnover_percent,opponent_vs_tourney_team_allowed_turnover_percent,season_allowed_offensive_rebound_percent,opponent_season_allowed_offensive_rebound_percent,vs_tourney_team_allowed_offensive_rebound_percent,opponent_vs_tourney_team_allowed_offensive_rebound_percent,season_allowed_free_throw_rate,opponent_season_allowed_free_throw_rate,vs_tourney_team_allowed_free_throw_rate,opponent_vs_tourney_team_allowed_free_throw_rate
0,3/17/23,2023,Arizona St.,TCU,L,0.645161,0.645161,0.647059,70.323529,75.181818,...,19.707143,20.100000,31.555882,30.745455,31.771429,31.752632,34.017647,30.539394,41.128571,31.342105
1,3/17/23,2023,TCU,Arizona St.,W,0.645161,0.645161,0.636364,75.181818,70.323529,...,20.100000,19.707143,30.745455,31.555882,31.752632,31.771429,30.539394,34.017647,31.342105,41.128571
2,3/23/23,2023,Arkansas,Connecticut,L,0.612903,0.774194,0.606061,74.424242,78.454545,...,18.561905,16.155556,28.193939,26.109091,29.085714,27.288889,39.651515,38.945455,44.409524,36.088889
3,3/23/23,2023,Connecticut,Arkansas,W,0.774194,0.612903,0.757576,78.454545,74.424242,...,16.155556,18.561905,26.109091,28.193939,27.288889,29.085714,38.945455,39.651515,36.088889,44.409524
4,3/18/23,2023,Arkansas,Kansas,W,0.612903,0.806452,0.606061,74.424242,74.941176,...,18.561905,19.780769,28.193939,28.105882,29.085714,29.426923,39.651515,31.544118,44.409524,31.234615
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1237,3/19/15,2015,Wofford,Arkansas,L,0.793103,0.774194,0.812500,65.875000,78.029412,...,17.300000,20.100000,26.803125,34.135294,28.680000,36.235714,38.153125,39.347059,40.980000,44.550000
1238,3/20/15,2015,Northern Iowa,Wyoming,W,0.900000,0.689655,0.909091,65.363636,61.437500,...,17.354545,15.033333,25.645455,26.915625,27.418182,33.800000,26.939394,26.956250,32.672727,36.316667
1239,3/20/15,2015,Wyoming,Northern Iowa,L,0.689655,0.900000,0.718750,61.437500,65.363636,...,15.033333,17.354545,26.915625,25.645455,33.800000,27.418182,26.956250,26.939394,36.316667,32.672727
1240,3/26/15,2015,Arizona,Xavier,W,0.903226,0.612903,0.911765,76.441176,73.558824,...,19.175000,16.980000,21.861765,26.823529,22.400000,27.135000,36.726471,35.208824,41.068750,34.765000


In [197]:
tourney_games.to_sql('tourney_games_v5', conn, if_exists='replace', index=False)

266