# About
This notebook shows the creation of the final submission
***
# Preface
## Imports

In [24]:
import os
os.chdir("/home/jovyan/work")

import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline

from src.transformer import HistWinLossTransformer, OddTransformer, WinLossTransformer
from src.utils import format_submission, get_table, sample_submission_to_df

***
# Get the data

In [10]:
df_sample_stage2 = sample_submission_to_df(get_table("t_original_sample_submission_stage2"))
df_sample_stage2.head()

Unnamed: 0,season,team_id_a,team_id_b
0,2018,1104,1112
1,2018,1104,1113
2,2018,1104,1116
3,2018,1104,1120
4,2018,1104,1137


# Merge with meta information & derive odds

In [19]:
# derive wins & losses
wltrans = WinLossTransformer(
    "team_id_a",
    "team_id_b",
)

# derive wins & losses
ncaa_wltrans = HistWinLossTransformer(
    "team_id_a",
    "team_id_b",
)

# derive odds
otrans = OddTransformer(
    ["wins_reg_a", "wins_hist_a"],
    ["losses_reg_a", "losses_hist_a"],
    ["wins_reg_b", "wins_hist_b"],
    ["losses_reg_b", "losses_hist_b"],
    n_samples=100,
    n_experiments=100,
)

# chain transformer
ppl = Pipeline([
    ("win_loss_transformer", wltrans),
    ("ncaa_win_loss_transformer", ncaa_wltrans),
    ("odd_transfomer", otrans),
])

In [22]:
df_odds = ppl.fit_transform(df_sample_stage2)
df_odds.head()

Unnamed: 0,season,team_id_a,team_id_b,wins_reg_a,wins_reg_b,losses_reg_a,losses_reg_b,wins_hist_a,wins_hist_b,losses_hist_a,losses_hist_b,odds_a
0,2018,1104,1112,19,27,15,7,18,54,15,30,0.036
1,2018,1104,1113,19,20,15,11,18,5,15,5,0.284
2,2018,1104,1116,19,23,15,11,18,30,15,18,0.1196
3,2018,1104,1120,19,25,15,7,18,12,15,7,0.029
4,2018,1104,1137,19,25,15,9,18,2,15,7,0.2208


# Format the data for submission readiness

In [30]:
df_submission = \
format_submission(
    df_odds,
    col_season="season",
    col_team_id_w="team_id_a",
    col_team_id_l="team_id_b",
    col_odds_w="odds_a",
    warning=False
)

df_submission.head()

Unnamed: 0,id,pred
0,2018_1104_1112,0.036
1,2018_1104_1113,0.284
2,2018_1104_1116,0.1196
3,2018_1104_1120,0.029
4,2018_1104_1137,0.2208


In [32]:
df_submission.to_csv("reg_hist_beta_odds.csv", index=False)