# Add Features

In [77]:
import psycopg2 as pg
import pandas as pd
import numpy as np

from psycopg2.extras import execute_values

1. Regular Season Win Percentage
2. Regular Season Opponents Win Percentage
3. Regular Season Win Percentage Differential
4. Conference
5. Conference Difficulty per Season
6. Schedule Difficulty
7. NCAA Tourney Seed Differential

## Win Percentage

In [78]:
conn = pg.connect(database='postgres',
                  user='postgres',
                  password='w207final',
                  host='35.185.225.167')

# pull regular season and tourney data to build indexes
query = '''
    SELECT "Season", "DayNum", "Team", "Outcome"
    FROM prod.features
    WHERE "Season Type" = 'Regular'
'''

df = pd.read_sql_query(query, conn)
conn.close()

In [79]:
df.shape

(164082, 4)

In [80]:
# wins per team
wins = df[['Season', 'Team', 'Outcome']].groupby(by=["Season", "Team"]).agg(['sum', 'count'])
wins.reset_index(inplace=True)
wins.columns = [' '.join(col).strip() for col in wins.columns.values]
wins.rename(columns={'Outcome sum': 'wins', 'Outcome count': 'games'}, inplace=True)
wins['winpct'] = wins['wins'] / wins['games']

In [81]:
wins.head()
wins.shape

(5481, 5)

In [82]:
# pull tourney matchups
conn = pg.connect(database='postgres',
                  user='postgres',
                  password='w207final',
                  host='35.185.225.167')

query = '''
    SELECT DISTINCT "Season", "Team", "Opponent"
    FROM prod.features
    WHERE "Season Type" = 'NCAA Tourney'
'''

matchups = pd.read_sql_query(query, conn)

In [83]:
matchups.head()
matchups.shape

(6518, 3)

In [84]:
wins2.head()

Unnamed: 0,Season,Opponent,OpponentWinpct
0,2003,1102,0.428571
1,2003,1103,0.481481
2,2003,1104,0.607143
3,2003,1105,0.269231
4,2003,1106,0.464286


In [85]:
wins_matchup = pd.merge(matchups, wins, how='left', on=['Season', 'Team'])
wins2 = wins[['Season', 'Team', 'winpct']].rename(columns={'Team': 'Opponent', 'winpct': 'OpponentWinpct'})
wins_matchup2 = pd.merge(wins_matchup, wins2, how='left', on=['Season', 'Opponent'])
wins_matchup2.head()

Unnamed: 0,Season,Team,Opponent,wins,games,winpct,OpponentWinpct
0,2018,1277,1355,29,33,0.878788,0.8
1,2018,1181,1293,26,33,0.787879,0.827586
2,2006,1261,1417,22,30,0.733333,0.818182
3,2014,1124,1304,22,33,0.666667,0.612903
4,2015,1246,1214,34,34,1.0,0.484848


In [86]:
to_load = wins_matchup2[['Season', 'Team', 'Opponent', 'winpct', 'OpponentWinpct']].copy()
to_load['winpctDiff'] = to_load['winpct'] - to_load['OpponentWinpct']

In [87]:
datarows = []
for i in to_load.itertuples(index=False):
    row = (int(i.Season), int(i.Team), int(i.Opponent), float(i.winpct), float(i.OpponentWinpct), float(i.winpctDiff))
    datarows.append(row)

In [89]:
alter = '''
    ALTER TABLE prod.features
      ADD COLUMN IF NOT EXISTS "WinPct" REAL,
      ADD COLUMN IF NOT EXISTS "OpponentWinPct" REAL,
      ADD COLUMN IF NOT EXISTS "WinPctDiff" REAL
'''

update = '''
    UPDATE prod.features as f
       SET "WinPct" = data."WinPct",
           "OpponentWinPct" = data."OpponentWinPct",
           "WinPctDiff" = data."WinPctDiff"
      FROM (VALUES %s) AS data (
             "Season",
             "Team",
             "Opponent",
             "WinPct",
             "OpponentWinPct",
             "WinPctDiff"
             )
     WHERE f."Season" = data."Season"
       and f."Team" = data."Team"
       and f."Opponent" = data."Opponent"
       and f."Season Type" = 'NCAA Tourney'
'''

conn = pg.connect(database='postgres',
                  user='postgres',
                  password='w207final',
                  host='35.185.225.167')

c = conn.cursor()
c.execute(alter)
execute_values(c, update, datarows, page_size=8000)
conn.commit()
conn.close()