# Feature Generation
This notebooks contains the feature engineering for the first model iteration (see [feature list](https://github.com/jgoerner/SportsPrediction/wiki/Feature-List#actual-feature-vector-for-model-1)). The features currently lack a distinction between "regular season" and "playoffs" (see [issue](https://github.com/jgoerner/SportsPrediction/issues/61)) - to be solved soon.

# Imports

In [1]:
# Necessary to import custom modules
import os
from pathlib import Path
os.chdir("/home/jovyan/work")

import pandas as pd


from src.features import get_feature

# Processing
The overall processing consists of two steps:
1. get all features per team (e.g. wins as home team in 2016/2017), ...
2. get all games in the format team_home|team_guest|score_home|score_guest|home_win

### Features per team

In [2]:
# construct wins
df_wins_home = get_feature("wins_per_team_per_season_as_home")
df_wins_guest = get_feature("wins_per_team_per_season_as_guest")
df_wins = df_wins_home.merge(df_wins_guest)

# construct losses
df_losses_home = get_feature("losses_per_team_per_season_as_home")
df_losses_guest = get_feature("losses_per_team_per_season_as_guest")
df_losses = df_losses_home.merge(df_losses_guest)

# construct margins
df_margin_home = get_feature("average_score_margin_regular_season_as_home")
df_margin_guest = get_feature("average_score_margin_regular_season_as_guest")
df_margin = df_margin_home.merge(df_margin_guest)

# combine, pivot and rename
df_merged = df_wins.merge(df_losses).merge(df_margin).pivot(index="team", columns="season")
df_merged.columns = ["_".join(col) for col in df_merged.columns.values]
df_merged.head()

Unnamed: 0_level_0,wins_as_home_2015/2016,wins_as_home_2016/2017,wins_as_home_2017/2018,wins_as_guest_2015/2016,wins_as_guest_2016/2017,wins_as_guest_2017/2018,losses_as_home_2015/2016,losses_as_home_2016/2017,losses_as_home_2017/2018,losses_as_guest_2015/2016,losses_as_guest_2016/2017,losses_as_guest_2017/2018,score_margin_as_home_2015/2016,score_margin_as_home_2016/2017,score_margin_as_home_2017/2018,score_margin_as_guest_2015/2016,score_margin_as_guest_2016/2017,score_margin_as_guest_2017/2018
team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
Atlanta Hawks,27,23,16,21,20,8,14,18,25,20,21,33,11.95122,11.268293,12.292683,11.317073,11.073171,9.439024
Boston Celtics,28,30,27,20,23,28,13,11,14,21,18,13,11.073171,9.634146,9.829268,10.317073,8.756098,8.609756
Brooklyn Nets,14,13,15,7,7,13,27,28,26,34,34,28,10.243902,10.707317,9.585366,11.243902,13.682927,10.878049
Charlotte Hornets,30,22,21,18,14,15,11,19,20,23,27,26,11.292683,11.02439,12.560976,10.97561,9.365854,10.97561
Chicago Bulls,26,25,17,16,16,10,15,16,24,25,25,31,8.756098,11.878049,10.926829,10.97561,12.195122,13.512195


### All games with score

In [3]:
df_games = get_feature("all_games")
df_games.head()

Unnamed: 0,season,team_home,team_guest,score_home,score_guest,home_win
0,2017/2018,Boston Celtics,Brooklyn Nets,110,97,1
1,2017/2018,Chicago Bulls,Brooklyn Nets,96,124,0
2,2017/2018,Milwaukee Bucks,Brooklyn Nets,111,119,0
3,2017/2018,Philadelphia 76ers,Brooklyn Nets,121,95,1
4,2017/2018,Miami Heat,Brooklyn Nets,109,110,0


### Final merging

In [4]:
# merge meta information about home and guest team
df_final = df_games.merge(
    df_merged,
    left_on="team_home",
    right_index=True
).merge(
    df_merged,
    left_on="team_guest",
    right_index=True,
    suffixes=("_home", "_guest")
)
df_final.head()

Unnamed: 0,season,team_home,team_guest,score_home,score_guest,home_win,wins_as_home_2015/2016_home,wins_as_home_2016/2017_home,wins_as_home_2017/2018_home,wins_as_guest_2015/2016_home,...,losses_as_home_2017/2018_guest,losses_as_guest_2015/2016_guest,losses_as_guest_2016/2017_guest,losses_as_guest_2017/2018_guest,score_margin_as_home_2015/2016_guest,score_margin_as_home_2016/2017_guest,score_margin_as_home_2017/2018_guest,score_margin_as_guest_2015/2016_guest,score_margin_as_guest_2016/2017_guest,score_margin_as_guest_2017/2018_guest
0,2017/2018,Boston Celtics,Brooklyn Nets,110,97,1,28,30,27,20,...,26,34,34,28,10.243902,10.707317,9.585366,11.243902,13.682927,10.878049
22,2017/2018,Boston Celtics,Brooklyn Nets,108,105,1,28,30,27,20,...,26,34,34,28,10.243902,10.707317,9.585366,11.243902,13.682927,10.878049
42,2016/2017,Boston Celtics,Brooklyn Nets,114,105,1,28,30,27,20,...,26,34,34,28,10.243902,10.707317,9.585366,11.243902,13.682927,10.878049
81,2016/2017,Boston Celtics,Brooklyn Nets,122,117,1,28,30,27,20,...,26,34,34,28,10.243902,10.707317,9.585366,11.243902,13.682927,10.878049
109,2015/2016,Boston Celtics,Brooklyn Nets,120,95,1,28,30,27,20,...,26,34,34,28,10.243902,10.707317,9.585366,11.243902,13.682927,10.878049


# Persistence

In [5]:
p = Path("./data/features")
if not p.exists():
    p.mkdir(parents=True)
df_final.to_csv("./data/features/first_iteration.csv", index=False)