In [2]:
import wandb
import sys, os

from playlist_recommender.modelling import model_pipeline
from playlist_recommender.modelling import utils
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import (
    OneHotEncoder,
    StandardScaler,
    MinMaxScaler,
    RobustScaler,
    MaxAbsScaler,
    LabelEncoder,
)
from imblearn.over_sampling import RandomOverSampler
import numpy as np
import pandas as pd
import itertools
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier

In [14]:
scalers = [None, StandardScaler(), MinMaxScaler(), RobustScaler(), MaxAbsScaler()]
samplers = [None, RandomOverSampler()]
featurisers = [OneHotEncoder()]  # None doesn't work well
classifiers = [LogisticRegression(), RandomForestClassifier()]

In [9]:
scalers = [MaxAbsScaler()]
samplers = [RandomOverSampler()]
featurisers = [OneHotEncoder(handle_unknown="ignore")]  # None doesn't work well
classifiers = [
    LogisticRegression(max_iter=1200),
    RandomForestClassifier(),
    MLPClassifier(max_iter=1000),
]

In [10]:
config_permuation_builer = {
    "scaler": scalers,
    "sampler": samplers,
    "featuriser": featurisers,
    "classifier": classifiers,
}
_keys, _values = zip(*config_permuation_builer.items())

In [11]:
config_permutations = [dict(zip(_keys, v)) for v in itertools.product(*_values)]
config_permutations

[{'scaler': MaxAbsScaler(),
  'sampler': RandomOverSampler(),
  'featuriser': OneHotEncoder(handle_unknown='ignore'),
  'classifier': LogisticRegression(max_iter=1200)},
 {'scaler': MaxAbsScaler(),
  'sampler': RandomOverSampler(),
  'featuriser': OneHotEncoder(handle_unknown='ignore'),
  'classifier': RandomForestClassifier()},
 {'scaler': MaxAbsScaler(),
  'sampler': RandomOverSampler(),
  'featuriser': OneHotEncoder(handle_unknown='ignore'),
  'classifier': MLPClassifier()}]

# Read data

In [12]:
X, y = utils.prep_playlist_df()

In [13]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.35, random_state=0, stratify=y, shuffle=True
)

# Different transformation test

In [14]:
def test_transformation(X, y, config):
    pipe = model_pipeline.make_config_pipeline(X, config)
    scores = cross_val_score(pipe, X, y, cv=5, scoring="f1_macro")
    f1_score = np.mean(scores)
    print(f"F1 score: {f1_score:.3f}")
    return f1_score

In [15]:
_values_list = []
for config in config_permutations:
    if config["featuriser"] is None:
        X = X.drop("artist_names", axis=1)
    f1_score = test_transformation(X, y, config)
    _values = [str(x) for x in config.values()]
    _values.append(f1_score)
    _values_list.append(_values)

config_df = pd.DataFrame(
    _values_list, columns=["scaler", "sampler", "featuriser", "classifier", "f1"]
)

F1 score: 0.376
F1 score: 0.281




F1 score: 0.302




In [None]:
config_df