In [None]:
import os
import sys
import numpy as np
import wandb

from playlist_recommender.modelling import model_pipeline
from playlist_recommender.modelling import utils
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn import metrics

In [None]:
X, y = utils.prep_playlist_df()
X_train, X_test, y_train, y_test = model_pipeline.make_best_transformation_pipeline(
    X, y
)
X_train.shape, X_test.shape, y_train.shape, y_test.shape,

# WandB sweeps

In [None]:
# Looks like you cant use variables in a sweep config, hence the c value list
sweep_config = {
    "method": "grid",
    "metric": {"name": "f1_score", "goal": "maximize"},
    "parameters": {
        "C": {
            "values": [
                0.0001,
                0.3593813663804626,
                2.782559402207126,
                21.54434690031882,
                166.81005372000558,
                1291.5496650148827,
            ]
        },
        "solver": {
            "values": ["lbfgs", "sag", "newton-cg", "saga"],
        },
        "max_iter": {"values": [2500, 5000, 10000]},
    },
}

In [None]:
sweep_id = wandb.sweep(sweep_config, project="spotify-recommender")

In [None]:
def train(X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test):
    with wandb.init(
        project="spotify-recommender",
        tags=["logres"],
        name="logistic regression tuning",
        config=wandb.config,
    ):
        config = wandb.config
        clf = LogisticRegression(
            C=config.C, solver=config.solver, max_iter=config.max_iter
        )
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        f1_score = metrics.f1_score(y_test, y_pred, average="macro", zero_division=0)
        accuracy = metrics.accuracy_score(y_test, y_pred)
        precision = metrics.precision_score(
            y_test, y_pred, average="macro", zero_division=0
        )
        recall = metrics.recall_score(y_test, y_pred, average="macro")
        wandb.log(
            {
                "f1_score": f1_score,
                "accuracy": accuracy,
                "precision": precision,
                "recall": recall,
            }
        )

In [None]:
wandb.agent(sweep_id, train)