In [None]:
import os
import sys
import numpy as np
import wandb

from playlist_recommender.modelling import model_pipeline
from playlist_recommender.modelling import utils
from sklearn import metrics
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.preprocessing import LabelEncoder
from wandb.keras import WandbCallback
import tensorflow as tf
from tensorflow import keras
from keras import layers
from pprint import pprint
from sklearn.utils import compute_class_weight
from keras.callbacks import EarlyStopping

In [None]:
X, y = utils.prep_playlist_df()
X_train, X_test, y_train, y_test = model_pipeline.make_best_transformation_pipeline(
    X, y
)
X_train.shape, X_test.shape, y_train.shape, y_test.shape,

In [None]:
le = LabelEncoder()
le.fit(y_train)
y_train = le.transform(y_train)
y_test = le.transform(y_test)

In [None]:
class_weights = compute_class_weight(class_weight = 'balanced',
                                                 classes = np.unique(y_train),
                                                 y = y_train)
class_weight_dict = dict(enumerate(class_weights))

# WandB Sweeps

In [None]:
sweep_config = {"method": "grid"}

In [None]:
metric = {"name": "f1_score", "goal": "maximize"}

sweep_config["metric"] = metric

In [None]:
parameters_dict = {
    "fc_layer_size": {
        "values": [
            128,
            256,
        ]
    },
}

sweep_config["parameters"] = parameters_dict

In [None]:
parameters_dict.update(
    {
        "epochs": {"values": [5000]},
        "learning_rate": {"values": [0.00001, 0.001, 0.01]},
        "batch_size": {"values": [8,16,32,]},
    }
)

In [None]:
pprint(sweep_config)

In [None]:
sweep_id = wandb.sweep(sweep_config, project="spotify-recommender")

In [None]:
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=20, min_delta = 0.0001)


In [None]:
def build_model(fc_layer_size=15):
    input_shape = X_train.shape[1]
    num_classes = len(le.classes_)
    return keras.Sequential(
        [
            keras.Input(shape=input_shape),
            layers.Dense(fc_layer_size, activation="relu"),
            layers.Dense(fc_layer_size, activation="relu"),
            layers.Dense(fc_layer_size, activation="relu"),
            layers.Dense(fc_layer_size, activation="relu"),
            layers.Dense(num_classes, activation="softmax"),
        ]
    )

In [None]:
def train():
    # Specify the hyperparameter to be tuned along with
    # an initial value
    config_defaults = {
        "epochs": 30,
        "batch_size": 8,
        "learning_rate": 0.01,
        "fc_layer_size": 15,
    }

    # Initialize wandb with a sample project name
    with wandb.init(
        project="spotify-recommender",
        tags=["nn"],
        name="neural network tuning - class weights",
        config=wandb.config,
    ):
        config = wandb.config
        # Iniialize model with hyperparameters
        keras.backend.clear_session()
        model = build_model(fc_layer_size=wandb.config.fc_layer_size)

        # Compile the model
        opt = tf.keras.optimizers.Adam(
            learning_rate=wandb.config.learning_rate
        )  # optimizer with different learning rate specified by config
        model.compile(opt, "sparse_categorical_crossentropy", metrics=["acc"])

        # Train the model
        model.fit(
            X_train,
            y_train,
            epochs=wandb.config.epochs,
            validation_data=(X_test, y_test),
            class_weight = class_weight_dict, 
            callbacks=[
                WandbCallback(
                    save_model=False,
                    save_graph=False,
                    log_weights=False,
                    log_gradients=False,
                ), 
                es
            ],
        )  # WandbCallback to automatically track metrics

        y_pred = np.argmax(model.predict(X_test), axis=-1)
        f1_score = metrics.f1_score(y_test, y_pred, average="macro", zero_division=0)
        accuracy = metrics.accuracy_score(y_test, y_pred)
        precision = metrics.precision_score(
            y_test, y_pred, average="macro", zero_division=0
        )
        recall = metrics.recall_score(y_test, y_pred, average="macro")
        wandb.log(
            {
                "f1_score": f1_score,
                "accuracy": accuracy,
                "precision": precision,
                "recall": recall,
            }
        )

In [None]:
wandb.agent(sweep_id, function=train)