In [None]:
# ! pip install --upgrade mlflow

In [None]:
import typing as T
import os
import datetime
import json

import arviz as az
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pymc as pm
from scipy.stats import linregress, kendalltau, spearmanr
from scipy.sparse import csr
import mlflow

from power_ratings import pm_training as pmt

In [None]:
# ! pip install nb-black
# %load_ext nb_black

In [None]:
mlflow.set_experiment("feature-generation-2023")
mlflow.sklearn.autolog()

In [None]:
# read in game data, team names, and previously-calculated Elo scores for comparison
# seasons = [2020, 2021, 2022, 2023]
seasons = None


def generate_all_season_features(prefix: str, seasons: T.Optional[T.List[int]]):
    games_df, elo_df, teamnames = pmt.read_in_data(prefix, seasons=seasons)
    if prefix == "M":
        pre_scaler = pmt.M_PRE_SCALER
        pre_base = pmt.M_PRE_BASE
    else:
        pre_scaler = pmt.W_PRE_SCALER
        pre_base = pmt.W_PRE_BASE
    ratings_df = pmt.train_model_all_years(
        games_df,
        pre_scaler=pre_scaler,
        pre_base=pre_base,
        seasons=seasons,
    )
    output, joined = pmt.join_datasets(ratings_df, elo_df, teamnames)

    output.to_csv(f"data/{prefix}_data_interim.csv", index=False)
    joined.to_csv(f"data/{prefix}_features_interim.csv", index=False)

    df_for_eff = pmt.get_df_for_eff(prefix)
    new_features = pmt.get_full_features(
        df_for_eff, output.set_index(["Season", "TeamID"])
    )
    new_features = new_features[[
        "Season",
        "TeamName",
        "CombinedRating",
        "OffensiveRating",
        "DefensiveRating",
        "EloWithScore",
        "EloWinLoss",
        "EloDelta21Days",
        "PossessionEfficiencyFactor",
        "TempoEstimate",
        "ScoreVariance",
    ]]
    new_features.to_csv(f"output/{prefix}_data_complete.csv", index=False)
    mlflow.log_artifact(f"output/{prefix}_data_complete.csv")
    return new_features

In [None]:
m = generate_all_season_features("M", seasons=seasons)
w = generate_all_season_features("W", seasons=seasons)

In [None]:
with open("output/build_data.json", 'w') as f:
    json.dump({"build_date": datetime.date.today().strftime('%Y-%m-%d')}, f)

In [None]:
print(kendalltau(m.OffensiveRating, m.DefensiveRating))
print(spearmanr(m.OffensiveRating, m.DefensiveRating))
print(linregress(m.OffensiveRating, m.DefensiveRating).rvalue ** 2)