In [None]:
# ! python -m pip install optuna

In [None]:
import typing as T
import numpy as np
import pandas as pd

from scipy.stats import kendalltau, spearmanr
import plotly.express as px
import sklearn.linear_model
import sklearn.ensemble
import sklearn.neural_network
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import RobustScaler, MinMaxScaler
import sklearn.metrics
import sklearn.impute
import scipy.stats
import mlflow
import jupyter_black

import optuna

from power_ratings.tournament_dataset import MMadnessDataset
import power_ratings.tournament_dataset as td

from power_ratings.tuning import evaluate_model_on_years 

jupyter_black.load(lab=False)

In [None]:
PREFIX = "M"
MAX_YEAR = 2023

# mlflow.sklearn.autolog(log_models=False)
mlflow.set_experiment(f"optuna-{PREFIX}-{MAX_YEAR}")

In [None]:
ds_params = {
    "holdout_seasons": None,
    "prefix": PREFIX,
    "start_year": 2003,
    "extra_features": [
        "T1CombinedRating",
        "T1PossessionEfficiencyFactor",
        "T1EloWithScore",
        "T1EloWinLoss",
        "T1EloDay30WithScore",
        "T1EloDay30WinLoss",
        "T1WP16",
        "T1Seed",
        "T1EloDelta21Days",
        "T2WP16",
        "T2CombinedRating",
        "T2PossessionEfficiencyFactor",
        "T2EloWithScore",
        "T2EloWinLoss",
        "T2EloDay30WithScore",
        "T2EloDay30WinLoss",
        "T2Seed",
        "T2EloDelta21Days",
        "round",
    ],
    "holdout_strategy": "prior",
}
years = (
    2017,
    2018,
    2019,
    2021,
    2022,
)

all_datasets = {}
all_datasets["current"] = MMadnessDataset(
    **ds_params,
)
for year in years:
    print(f"creating {year} dataset")
    ds_params_new = ds_params.copy()
    ds_params_new["holdout_seasons"] = (year,)

    ds = MMadnessDataset(
        **ds_params_new,
    )
    all_datasets[year] = ds

In [None]:
import power_ratings as pr
# import importlib as imp
# imp.reload(pr)

In [None]:
study = optuna.create_study(direction="minimize")
study.optimize(
    lambda trial: pr.tuning.evaluate_model_on_years(trial, all_datasets, years), n_trials=2000
)
print(study.best_trial)

In [None]:
study.trials_dataframe().sort_values("value")

In [None]:
ds.X