In [None]:
import pandas as pd
import plotly.graph_objects as go
from pathlib import Path
import numpy as np

from vn1.score import compute_competition_score
from vn1.config import load_config

In [None]:
experiment_id = "731866309624208429"
run_id = "362c7ba7dd704e3f88f3f7f8b1f9c6cb"
# run_id = "4ca672547045491486d1a4fd409d771f"


base_path = Path(f"../mlruns/{experiment_id}/{run_id}/artifacts/")

In [None]:
config = load_config(base_path / "config.yaml")
if config.magic_multiplier != 1.0:
    print(f"""
        Warning!
        Tuning run which used a magic multiplier different from 1 (={config.magic_multiplier}).
        """)
    

In [None]:
print("fold err abs_err")
preds = {}
for fold in range(4):
    (y_pred, y_test) = pd.read_pickle(base_path / f"predictions_{run_id}_fold_{fold}.p")
    err = y_pred - y_test
    print(fold, err.mean().round(3), abs(err).mean().round(1))
    preds[fold] = (y_pred, y_test)


In [None]:
# Notes:
# - Always positive sum of errors (over-shooting)
# - The most recent fold (zero) has the highest error

In [None]:
def compute_cv_score_multiplier(preds, multiplier):
    scores = {}
    for fold, (y_pred, y_test) in preds.items():
        y_pred_corrected = y_pred * multiplier
        score = compute_competition_score(y_pred_corrected, y_test)
        scores[fold] = score

    return float(sum(scores.values()) / len(scores))

In [None]:
compute_cv_score_multiplier(preds, multiplier=1)

In [None]:
multiplier_scores = []
multipliers = np.linspace(0.995, 1, 21)
for multiplier in multipliers:
    score = compute_cv_score_multiplier(preds, multiplier=multiplier)
    multiplier_scores.append(score)
    # print(multiplier, score)

multiplier_df = pd.DataFrame({"multiplier": multipliers, "score": multiplier_scores})

fig = go.Figure([go.Scatter(x=multiplier_df.multiplier, y=multiplier_df.score)])
fig.update_layout(width=600)
fig.show()

In [None]:
# seems like 0.9955 is the minimum