In [1]:
import pandas as pd
import numpy as np

from group_lasso import GroupLasso

from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PowerTransformer, OneHotEncoder

from tqdm import tqdm

In [2]:
df = pd.read_csv('../data/final/player-model-data.csv', index_col=0)

predictor_cols = df.columns[5:-10].tolist()
target_cols = df.columns[-10:].tolist()


In [3]:
X_train, X_test, Y_train, Y_test = train_test_split(
    df.loc[:, predictor_cols],
    df.loc[:, target_cols],
    test_size=0.2,
    random_state=1897,
)

In [4]:
onehot_cols = predictor_cols[:4]
numeric_cols = predictor_cols[5:]

In [5]:
groups = (
    [0] * 6
    + [1] * 5
    + [2] * 5
    + [3] * 3
    + [4]
    + [5] * 17
    + [6] * 16
    + [7] * 22
    + [8] * 23
    + [9] * 23
    + [10] * 24
    + [11] * 3
    + [12] * 21
    + [-1]  # Don't regularize Age
)

In [6]:
reg = np.logspace(-3, 3, 7) * 5

param_grid = {
    'group_lasso_regression__group_reg': reg,
    'group_lasso_regression__l1_reg': reg,
}

In [8]:
models = {}

for target_col in tqdm(target_cols):
    y_train = Y_train[target_col]
    y_test = Y_test[target_col]

    col_tf = ColumnTransformer([
        ('one_hot_encoder', OneHotEncoder(), onehot_cols),
        ('power_transformer', PowerTransformer(), numeric_cols)
    ], remainder='passthrough')

    pl = Pipeline([
        ('column_transformer', col_tf),
        ('group_lasso_regression', GroupLasso(
            groups=groups,
            n_iter=10_000,
            tol=1e-3,
            scale_reg=None,
            frobenius_lipschitz=False,
            fit_intercept=True,
            random_state=1897,
            supress_warning=True,
        ))
    ])

    grid_cv = GridSearchCV(pl, param_grid, scoring='neg_mean_squared_error', cv=5)
    grid_cv.fit(X_train, y_train)

    models[target_col[:-4]] = grid_cv

  0%|          | 0/10 [00:05<?, ?it/s]


KeyboardInterrupt: 

In [232]:
for m in models:
    print(models[m].best_params_)

{'group_lasso_regression__group_reg': 0.0, 'group_lasso_regression__l1_reg': 0.0}
{'group_lasso_regression__group_reg': 0.005, 'group_lasso_regression__l1_reg': 0.0}
{'group_lasso_regression__group_reg': 0.005, 'group_lasso_regression__l1_reg': 0.0}
{'group_lasso_regression__group_reg': 0.005, 'group_lasso_regression__l1_reg': 0.0}
{'group_lasso_regression__group_reg': 0.0, 'group_lasso_regression__l1_reg': 0.0}
{'group_lasso_regression__group_reg': 0.0, 'group_lasso_regression__l1_reg': 0.005}
{'group_lasso_regression__group_reg': 0.005, 'group_lasso_regression__l1_reg': 0.0}
{'group_lasso_regression__group_reg': 0.0, 'group_lasso_regression__l1_reg': 0.005}
{'group_lasso_regression__group_reg': 0.005, 'group_lasso_regression__l1_reg': 0.0}
{'group_lasso_regression__group_reg': 0.0, 'group_lasso_regression__l1_reg': 0.0}


In [258]:
col_tf = ColumnTransformer([
    ('one_hot_encoder', OneHotEncoder(), onehot_cols),
    ('power_transformer', PowerTransformer(), numeric_cols)
], remainder='passthrough')

pl = Pipeline([
    ('column_transformer', col_tf),
    ('group_lasso_regression', GroupLasso(
        groups=groups,
        group_reg=0.5,
        l1_reg=0.0,
        n_iter=10_000,
        tol=1e-3,
        scale_reg=None,
        frobenius_lipschitz=False,
        fit_intercept=True,
        random_state=1897,
        supress_warning=True,
    ))
])

pl.fit(X_train, y_train)

Pipeline(steps=[('column_transformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('one_hot_encoder',
                                                  OneHotEncoder(),
                                                  ['prev_season_squad_play_style',
                                                   'prev_season_league',
                                                   'League', 'Pos']),
                                                 ('power_transformer',
                                                  PowerTransformer(),
                                                  ['90s', 'Gls_shoot',
                                                   'Sh_shoot', 'SoT_shoot',
                                                   'SoT%_shoot', 'Sh/90_shoot',
                                                   'SoT/90_shoot', 'G/Sh_shoot',
                                                   'G/SoT_shoot',...
                     