In [135]:
import pandas as pd
import numpy as np
import math
import logging
import importlib

import sys
import os

import matplotlib.pyplot as plt
import mlflow

from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.base import BaseEstimator, TransformerMixin
from mlflow import log_metric, log_param, log_artifact
from sklearn.ensemble import StackingClassifier
from inspect import getsource
import inspect

from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

from sklearn.pipeline import Pipeline

import tempfile


In [164]:
sys.path.append(os.path.abspath(os.path.join('..', 'src')))

from data.load_data import load_data
from data.split_data import split_data
from data.clean_data import DataCleaner
from data.string_toInt_encoder import StringToIntEncoder
from data.oversample_minority import OversampleMinority
from data.undersample_majority import UndersampleMajority
from data.featur_target_pipline import FeatureTargetPipeline
from data.target_binarizer import TargetBinarizer
from data.dummies_encoder import SafeOneHotEncoder
from data.artist_popularity import ArtistPopularityEncoder
from data.balance_simpler import BalancedResampler


from predict.acuuracy import ModelEvaluator

from train.mlflow_experiment_runner import MLflowExperimentRunner
# from train.model_evaluator import ModelEvaluator


import data.load_data
import data.split_data
import data.clean_data
import data.string_toInt_encoder
import data.oversample_minority
import data.undersample_majority
import data.featur_target_pipline
import data.dummies_encoder
import data.artist_popularity
import data.balance_simpler

import predict.acuuracy

# import train.model_evaluator
import train.mlflow_experiment_runner


importlib.reload(data.load_data)
importlib.reload(data.split_data)
importlib.reload(data.clean_data)
importlib.reload(data.string_toInt_encoder)
importlib.reload(data.oversample_minority)
importlib.reload(data.undersample_majority)
importlib.reload(data.dummies_encoder)
importlib.reload(data.featur_target_pipline)
importlib.reload(data.target_binarizer)
importlib.reload(data.artist_popularity)
importlib.reload(data.balance_simpler)

# importlib.reload(train.model_evaluator)
importlib.reload(train.mlflow_experiment_runner)


importlib.reload(predict.acuuracy)



<module 'predict.acuuracy' from '/Users/level3/mlops_spotify/spotify-1million/src/predict/acuuracy.py'>

In [19]:
data_folder = './data'
df = load_data(data_folder)

Loading data...
Downloading dataset from Kaggle...
Dataset URL: https://www.kaggle.com/datasets/amitanshjoshi/spotify-1million-tracks
Download complete.
Loading CSV data...
Data loaded successfully.


In [20]:
target = df['popularity']
y = TargetBinarizer(threshold=50).fit_transform(target)
X = df.drop(columns='popularity')

X_train, X_val, X_test, y_train, y_val, y_test = split_data(X, y)

In [59]:
def run_experiment(model: BaseEstimator, 
                 sampler: object, 
                 preprocessor: Pipeline, 
                 X_train, y_train, 
                 X_val, y_val,
                 experiment_name: str = "Spotify_Popularity",
                 sampler_params: dict = None,
                 model_params: dict = None):
    mlflow.set_tracking_uri("http://localhost:5000") 
    mlflow.set_experiment(experiment_name)
    mlflow.sklearn.autolog()
    with mlflow.start_run():
        # Log parameters
        params = {
            "sampler": sampler.__class__.__name__ if sampler else "None",
            "model": model.__class__.__name__,
            **(sampler_params or {}),
            **(model_params or {})
        }
        mlflow.log_params(params)

        # Preprocess data
        X_train_preprocessed = preprocessor.fit_transform(X_train, y_train)
        
        # Apply sampling
        if sampler:
            sampler_instance = sampler(**(sampler_params or {}))
            sampler_instance.fit(X_train_preprocessed, y_train)
            X_resampled, y_resampled = sampler_instance.fit_resample(X_train_preprocessed, y_train)
        else:
            X_resampled, y_resampled = X_train_preprocessed, y_train

        # Create and train model
        final_model = model.set_params(**(model_params or {}))
        final_model.fit(X_resampled, y_resampled)

        # Create evaluation pipeline
        full_pipeline = Pipeline([
            ("preprocessing", preprocessor),
            ("classifier", final_model)
        ])

        # Evaluate
        evaluator = ModelEvaluator(full_pipeline, X_val, y_val)
        metrics = evaluator.evaluate(log_to_mlflow=True)
        
        # Log metrics
        mlflow.log_metrics(metrics)
        mlflow.sklearn.log_model(full_pipeline, "model")
        
        return metrics

In [72]:
df

Unnamed: 0.1,Unnamed: 0,artist_name,track_name,track_id,popularity,year,genre,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,0,Jason Mraz,I Won't Give Up,53QF56cjZA9RTuuMZDrSA6,68,2012,acoustic,0.483,0.303,4,-10.058,1,0.0429,0.6940,0.000000,0.1150,0.1390,133.406,240166,3
1,1,Jason Mraz,93 Million Miles,1s8tP3jP4GZcyHDsjvw218,50,2012,acoustic,0.572,0.454,3,-10.286,1,0.0258,0.4770,0.000014,0.0974,0.5150,140.182,216387,4
2,2,Joshua Hyslop,Do Not Let Me Go,7BRCa8MPiyuvr2VU3O9W0F,57,2012,acoustic,0.409,0.234,3,-13.711,1,0.0323,0.3380,0.000050,0.0895,0.1450,139.832,158960,4
3,3,Boyce Avenue,Fast Car,63wsZUhUZLlh1OsyrZq7sz,58,2012,acoustic,0.392,0.251,10,-9.845,1,0.0363,0.8070,0.000000,0.0797,0.5080,204.961,304293,4
4,4,Andrew Belle,Sky's Still Blue,6nXIYClvJAfi6ujLiKqEq8,54,2012,acoustic,0.430,0.791,6,-5.419,0,0.0302,0.0726,0.019300,0.1100,0.2170,171.864,244320,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1159759,1473391,Nicola Conte,Black Spirits,0m27F0IGHLGAWhqd6ccYst,4,2011,trip-hop,0.373,0.742,10,-6.453,0,0.0736,0.3250,0.000141,0.1590,0.5220,107.951,344013,3
1159760,1473392,Nicola Conte,Quiet Dawn,6er9p611eHEcUCU50j7D57,3,2011,trip-hop,0.516,0.675,7,-7.588,0,0.0326,0.7880,0.000129,0.1300,0.2640,119.897,285067,4
1159761,1473393,Amon Tobin,Morning Ms Candis,7jsMMqxy1tt0rH5FzYcZTQ,2,2011,trip-hop,0.491,0.440,5,-8.512,1,0.0274,0.4770,0.003130,0.0936,0.0351,100.076,214253,4
1159762,1473394,Peace Orchestra,Happy Christmas (War Is Over),77lA1InUaXztuRk2vOzD1S,0,2011,trip-hop,0.480,0.405,0,-13.343,1,0.0276,0.4310,0.000063,0.1250,0.2020,133.885,239133,3


In [22]:
experiments = [
    {
        "model": LogisticRegression(),
        "sampler": OversampleMinority,
        "sampler_params": {"target_minority_percentage": 0.4},
        "model_params": {"random_state": 42, "class_weight": None}
    },
    {
        "model": LogisticRegression(),
        "sampler": UndersampleMajority,
        "sampler_params": {"target_minority_percentage": 0.4},
        "model_params": {"random_state": 42, "class_weight": None}
    },
    {
        "model": LogisticRegression(),
        "sampler": None,
        "sampler_params": {},
        "model_params": {"random_state": 42, "class_weight": 'balanced'}
    }
]



In [26]:
for exp in experiments:
    run_experiment(
        model=exp["model"],
        sampler=exp["sampler"],
        preprocessor=base_preprocessor,
        X_train=X_train,
        y_train=y_train,
        X_val=X_val,
        y_val=y_val,
        sampler_params=exp["sampler_params"],
        model_params=exp["model_params"]
    )

  raw_prediction = X @ weights + intercept
  raw_prediction = X @ weights + intercept
  raw_prediction = X @ weights + intercept
  grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights
  grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights
  grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b


🏃 View run big-squid-281 at: http://localhost:5000/#/experiments/400003216354571829/runs/40a1e276d1564eb6b245a67fa55de149
🧪 View experiment at: http://localhost:5000/#/experiments/400003216354571829


  raw_prediction = X @ weights + intercept
  raw_prediction = X @ weights + intercept
  raw_prediction = X @ weights + intercept
  grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights
  grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights
  grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b


🏃 View run unruly-bird-768 at: http://localhost:5000/#/experiments/400003216354571829/runs/ec9497c5384c443598a8f7e2d0be3bce
🧪 View experiment at: http://localhost:5000/#/experiments/400003216354571829


  raw_prediction = X @ weights + intercept
  raw_prediction = X @ weights + intercept
  raw_prediction = X @ weights + intercept
  grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights
  grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights
  grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b


🏃 View run masked-lamb-311 at: http://localhost:5000/#/experiments/400003216354571829/runs/393896b994034f7ab355db61b887afde
🧪 View experiment at: http://localhost:5000/#/experiments/400003216354571829


In [36]:
experiments = [
    {
        "model": XGBClassifier(),
        "sampler": BalancedResampler,  # XGBoost handles imbalance internally
        "sampler_params": {},
        "model_params": {
            'n_estimators': 300,
            'max_depth': 8,
            'scale_pos_weight': 10,
            'eval_metric': 'aucpr'
        }
    },
    {
        "model": XGBClassifier(),
        "sampler": Under,  # XGBoost handles imbalance internally
        "sampler_params": {},
        "model_params": {
            'n_estimators': 300,
            'max_depth': 8,
            'scale_pos_weight': 10,
            'eval_metric': 'aucpr'
        }
    },
    {
        "model": LGBMClassifier(),
        "sampler": None,
        "sampler_params": {},
        "model_params": {
            'num_leaves': 63,
            'min_data_in_leaf': 50,
            'class_weight': 'balanced'
        }
    }
]

for exp in experiments:
    run_experiment(
        model=exp["model"],
        sampler=exp["sampler"],
        preprocessor=base_preprocessor,
        X_train=X_train,
        y_train=y_train,
        X_val=X_val,
        y_val=y_val,
        sampler_params=exp["sampler_params"],
        model_params=exp["model_params"]
    )

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


🏃 View run abrasive-bass-770 at: http://localhost:5000/#/experiments/400003216354571829/runs/e66f461bed6749bc84b1e816f5e835f6
🧪 View experiment at: http://localhost:5000/#/experiments/400003216354571829
[LightGBM] [Info] Number of positive: 36978, number of negative: 774856
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008082 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3018
[LightGBM] [Info] Number of data points in the train set: 811834, number of used features: 112
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000












🏃 View run fearless-ape-419 at: http://localhost:5000/#/experiments/400003216354571829/runs/a9b99e50512846ce885137318dab7cff
🧪 View experiment at: http://localhost:5000/#/experiments/400003216354571829


In [None]:

def run_grid_search_experiment(model, param_grid, sampler, preprocessor,
                              X_train, y_train, X_val, y_val,
                              experiment_name="Spotify_GridSearch"):
    """
    Run grid search experiment with MLflow tracking
    """
    mlflow.set_experiment(experiment_name)
    
    with mlflow.start_run():
        # Preprocess data
        X_train_preprocessed = preprocessor.fit_transform(X_train, y_train)
        
        # Apply sampling if specified
        if sampler:
            sampler_instance = sampler()
            X_resampled, y_resampled = sampler_instance.fit_resample(X_train_preprocessed, y_train)
        else:
            X_resampled, y_resampled = X_train_preprocessed, y_train
        
        # Setup GridSearchCV
        grid_search = GridSearchCV(
            estimator=model,
            param_grid=param_grid,
            scoring='roc_auc',
            cv=3,
            n_jobs=-1,
            verbose=1
        )
        
        # Train with grid search
        grid_search.fit(X_resampled, y_resampled)
        
        # Log best parameters and metrics
        mlflow.log_params(grid_search.best_params_)
        mlflow.log_metric("best_score", grid_search.best_score_)
        
        # Create final pipeline with best estimator
        best_model = grid_search.best_estimator_
        full_pipeline = Pipeline([
            ("preprocessing", preprocessor),
            ("classifier", best_model)
        ])
        
        # Evaluate on validation set
        evaluator = ModelEvaluator(full_pipeline, X_val, y_val)
        metrics = evaluator.evaluate(log_to_mlflow=True)
        
        # Log model and artifacts
        mlflow.sklearn.log_model(full_pipeline, "best_pipeline")
        mlflow.log_dict(grid_search.cv_results_, "cv_results.json")
        
        return grid_search


In [44]:
# Define experiment configurations
experiments = [
    {
        "model": XGBClassifier(eval_metric='logloss'),
        "param_grid": {
            'n_estimators': [100, 200],
            'max_depth': [6, 8],
            'learning_rate': [0.05, 0.1],
            'scale_pos_weight': [5, 10]
        },
        "sampler": None,
        "sampler_params": {}
    },
    {
        "model": LGBMClassifier(),
        "param_grid": {
            'num_leaves': [31, 63],
            'min_data_in_leaf': [20, 50],
            'learning_rate': [0.05, 0.1],
            'class_weight': ['balanced', None]
            'force_col_wise': True
        },
        "sampler": None,
        "sampler_params": {}
    },
    {
        "model": CatBoostClassifier(verbose=0),
        "param_grid": {
            'iterations': [200, 300],
            'depth': [6, 8],
            'learning_rate': [0.03, 0.05],
            'auto_class_weights': ['Balanced', None]
        },
        "sampler": None,
        "sampler_params": {}
    }
]

# Run all experiments
for exp in experiments:
    grid_search = run_grid_search_experiment(
        model=exp["model"],
        param_grid=exp["param_grid"],
        sampler=exp["sampler"],
        preprocessor=base_preprocessor,
        X_train=X_train,
        y_train=y_train,
        X_val=X_val,
        y_val=y_val
    )
    
    # Print best results
    print(f"\nBest parameters for {exp['model'].__class__.__name__}:")
    print(grid_search.best_params_)
    print("Validation metrics:", grid_search.best_score_)

Fitting 3 folds for each of 16 candidates, totalling 48 fits
[LightGBM] [Info] Number of positive: 344380, number of negative: 516571
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.825530 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20081
[LightGBM] [Info] Number of data points in the train set: 860951, number of used features: 112
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number of positive: 344380, number of negative: 516571
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.319249 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 20064
[LightGBM] [Info] Number of data points in the train set: 860951, number of used features: 112
[LightGBM] 



[LightGBM] [Info] Number of positive: 344380, number of negative: 516570
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.902409 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 20147
[LightGBM] [Info] Number of data points in the train set: 860950, number of used features: 112
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Start training from score 0.000000
[LightGBM] [Info] Number of positive: 344380, number of negative: 516570
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.294047 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 20147
[LightGBM] [Info] Number of data points in the train set: 860950, number of used features: 112
[LightGBM]











🏃 View run sassy-wolf-578 at: http://localhost:5000/#/experiments/186147461124726310/runs/7ac237d8f9f6420b87ae082061c558ae
🧪 View experiment at: http://localhost:5000/#/experiments/186147461124726310

Best parameters for LGBMClassifier:
{'class_weight': None, 'learning_rate': 0.1, 'min_data_in_leaf': 50, 'num_leaves': 63}
Validation metrics: 0.9920341562098001
Fitting 3 folds for each of 16 candidates, totalling 48 fits
[LightGBM] [Info] Number of positive: 344380, number of negative: 516571
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.820756 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 20064
[LightGBM] [Info] Number of data points in the train set: 860951, number of used features: 112
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [I



[LightGBM] [Info] Number of positive: 344380, number of negative: 516570
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.688452 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 20147
[LightGBM] [Info] Number of data points in the train set: 860950, number of used features: 112
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Start training from score 0.000000
[LightGBM] [Info] Number of positive: 344380, number of negative: 516571
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.215320 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 20081
[LightGBM] [Info] Number of data points in the train set: 860951, number of used features: 112
[LightGBM]

24 fits failed out of a total of 48.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
24 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/level3/mlops_spotify/spotify-1million/.venv/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/level3/mlops_spotify/spotify-1million/.venv/lib/python3.11/site-packages/catboost/core.py", line 5245, in fit
    self._fit(X, y, cat_features, text_features, embedding_features, None, graph, sample_weight, None, None, None, None, baseline, use_best_model,
  File "/Users/level3/mlops_spotify/spotify-1million/.venv/lib/python3.11/site-packages/catboost/core.py", 

🏃 View run unique-crow-985 at: http://localhost:5000/#/experiments/186147461124726310/runs/afc98f8cff2142e4b070d937fbbf0989
🧪 View experiment at: http://localhost:5000/#/experiments/186147461124726310

Best parameters for CatBoostClassifier:
{'auto_class_weights': 'Balanced', 'depth': 8, 'iterations': 300, 'learning_rate': 0.05}
Validation metrics: 0.928711849538412
[LightGBM] [Info] Number of positive: 344380, number of negative: 516571
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.944931 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 20064
[LightGBM] [Info] Number of data points in the train set: 860951, number of used features: 112
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000
[LightGBM] [Info] Number of positive: 344380, number of negative: 51

In [51]:
from sklearn.metrics import f1_score
from sklearn.metrics import make_scorer, recall_score
# Custom scorer for class 1 recall
recall_class1_scorer = make_scorer(
    recall_score,
    pos_label=1,  # Focus on class 1
    average='binary',  # For binary classification
    needs_threshold=True  # For XGBoost probability outputs
)

# Define experiment configurations

experiments = [
    {
        "model": XGBClassifier( 
                               objective='binary:logistic',
                               scale_pos_weight=10,   
                               max_delta_step=1, 
                               eval_metric=recall_class1_scorer),
        "param_grid": {
            'n_estimators': [100, 200],
            'max_depth': [6, 8],
            'learning_rate': [0.05, 0.1],
            'scale_pos_weight': [5, 10],
            'reg_alpha': [0, 0.1],
            'gamma': [0, 0.1],
             'class_weight': 'balanced'
        },
        "sampler": None,
        "sampler_params": {}
    }
]

# Run all experiments
for exp in experiments:
    grid_search = run_grid_search_experiment(
        model=exp["model"],
        param_grid=exp["param_grid"],
        sampler=exp["sampler"],
        preprocessor=base_preprocessor,
        X_train=X_train,
        y_train=y_train,
        X_val=X_val,
        y_val=y_val
    )
    
    # Print best results
    print(f"\nBest parameters for {exp['model'].__class__.__name__}:")
    print(grid_search.best_params_)
    print("Validation metrics:", grid_search.best_score_)

Fitting 3 folds for each of 64 candidates, totalling 192 fits


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


🏃 View run blushing-sheep-641 at: http://localhost:5000/#/experiments/186147461124726310/runs/76d7427c0b5d456fb678f103749abeca
🧪 View experiment at: http://localhost:5000/#/experiments/186147461124726310

Best parameters for XGBClassifier:
{'gamma': 0, 'learning_rate': 0.1, 'max_depth': 8, 'n_estimators': 200, 'reg_alpha': 0, 'scale_pos_weight': 5}
Validation metrics: 0.9332342125255831


In [57]:
from sklearn.utils.class_weight import compute_class_weight

# Custom scorer for class 1 recall
f1_class1_scorer = make_scorer(
    f1_score,
    pos_label=1,
    average='binary',
    needs_threshold=True
)
# Define experiment configurations

classes = np.unique(y_train)
class_weights = compute_class_weight('balanced', classes=classes, y=y_train)
# For binary classification, XGBoost uses scale_pos_weight
scale_pos_weight = class_weights[1]/class_weights[0] 

experiments = [
    {
        "model": XGBClassifier(
            objective='binary:logistic',
            eval_metric='logloss',  # Keep standard eval metric
            use_label_encoder=False,
            scale_pos_weight=scale_pos_weight  # Add initial weight
        ),
        "param_grid": {
            'n_estimators': [100, 200],
            'max_depth': [6, 8],
            'learning_rate': [0.05, 0.1],
            'scale_pos_weight': [scale_pos_weight, scale_pos_weight*1.5],  # Test different weights
            'reg_alpha': [0, 0.1],
            'gamma': [0, 0.1],
            'min_child_weight': [1, 3]  # Helps with class imbalance
        },
        "sampler": None,
        "sampler_params": {}
    }
]

# Run all experiments
for exp in experiments:
    grid_search = run_grid_search_experiment(
        model=exp["model"],
        param_grid=exp["param_grid"],
        sampler=exp["sampler"],
        preprocessor=base_preprocessor,
        X_train=X_train,
        y_train=y_train,
        X_val=X_val,
        y_val=y_val
    )
    
    # Print best results
    print(f"\nBest parameters for {exp['model'].__class__.__name__}:")
    print(grid_search.best_params_)
    print("Validation metrics:", grid_search.best_score_)

Fitting 3 folds for each of 128 candidates, totalling 384 fits


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


🏃 View run charming-midge-564 at: http://localhost:5000/#/experiments/186147461124726310/runs/52329c0e27c04499aebf442baad776bb
🧪 View experiment at: http://localhost:5000/#/experiments/186147461124726310

Best parameters for XGBClassifier:
{'gamma': 0, 'learning_rate': 0.1, 'max_depth': 8, 'min_child_weight': 3, 'n_estimators': 200, 'reg_alpha': 0, 'scale_pos_weight': np.float64(20.95451349451025)}
Validation metrics: 0.9331026256785857


In [58]:
from sklearn.utils.class_weight import compute_class_weight

classes = np.unique(y_train)
class_weights = compute_class_weight('balanced', classes=classes, y=y_train)
scale_pos_weight = class_weights[1]/class_weights[0] 

experiments = [
    {
        "model": XGBClassifier(),
        "param_grid": {
            'n_estimators': [100, 200],
            'max_depth': [6, 8],
            'learning_rate': [0.05, 0.1],
        },
        "sampler": BalancedResampler(),
        "sampler_params": {}
    }
]

# Run all experiments
for exp in experiments:
    grid_search = run_grid_search_experiment(
        model=exp["model"],
        param_grid=exp["param_grid"],
        sampler=exp["sampler"],
        preprocessor=base_preprocessor,
        X_train=X_train,
        y_train=y_train,
        X_val=X_val,
        y_val=y_val
    )
    
    # Print best results
    print(f"\nBest parameters for {exp['model'].__class__.__name__}:")
    print(grid_search.best_params_)
    print("Validation metrics:", grid_search.best_score_)

Fitting 3 folds for each of 32 candidates, totalling 96 fits




🏃 View run serious-turtle-283 at: http://localhost:5000/#/experiments/186147461124726310/runs/8b106920d67e42459871dab38910ecb5
🧪 View experiment at: http://localhost:5000/#/experiments/186147461124726310

Best parameters for XGBClassifier:
{'learning_rate': 0.1, 'max_depth': 8, 'min_child_weight': 3, 'n_estimators': 200, 'scale_pos_weight': np.float64(20.95451349451025)}
Validation metrics: 0.9331026256785857


In [104]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler

# Numeric transformer
numeric_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())
])

# Categorical transformer
categorical_transformer = SafeOneHotEncoder(columns=['genre', 'key', 'time_signature'])

# Full preprocessor using ColumnTransformer
column_transformer = ColumnTransformer(transformers=[
    ('num', numeric_transformer, ['tempo', 'duration_ms']),
    ('cat', categorical_transformer, ['genre', 'key', 'time_signature'])
])

# Full pipeline with custom transformers
base_preprocessor = Pipeline([
    ('artist_encoder', ArtistPopularityEncoder()),
    ('data_cleaner', DataCleaner()),
    ('encoder', SafeOneHotEncoder(columns=['genre', 'key', 'time_signature'])),
    ('scaler', StandardScaler()),
])


In [146]:
import mlflow
import numpy as np
import json
import git
from pathlib import Path
from mlflow.models import infer_signature
from mlflow.pyfunc import log_model
from sklearn.pipeline import Pipeline

def run_experiment(
    model: BaseEstimator, 
    sampler: object, 
    preprocessor: Pipeline, 
    X_train, y_train, 
    X_val, y_val,
    experiment_name: str = "Spotify_Popularity",
    sampler_params: dict = None,
    model_params: dict = None,
    # New parameters for enhanced logging
    dataset_version: str = "1.0.0",
    input_example: pd.DataFrame = None,
    metadata: dict = None,
    tags: dict = None,
    log_artifacts: bool = True,
    registered_model_name: str = None
):
    """
    Enhanced experiment runner with comprehensive MLflow logging
    
    New Parameters:
    - dataset_version: Version identifier for the dataset
    - input_example: Sample input data for model signature
    - metadata: Dictionary of additional metadata
    - tags: Key-value pairs for experiment organization
    - log_artifacts: Whether to save feature importance and configs
    - registered_model_name: Name for model registry
    """
    
    mlflow.set_tracking_uri("http://localhost:5000")
    mlflow.set_experiment(experiment_name)
    
    with mlflow.start_run():
        # =====================
        # Setup and Preprocessing
        # =====================
        sampler_params = sampler_params or {}
        model_params = model_params or {}
        tags = tags or {}
        metadata = metadata or {}

        # Log git commit hash
        try:
            repo = git.Repo(Path.cwd(), search_parent_directories=True)
            git_commit = repo.head.object.hexsha
            tags["git_commit"] = git_commit
        except Exception:
            pass

        # =====================
        # Core Experiment Logic
        # =====================
        # Preprocess data
        X_train_preprocessed = preprocessor.fit_transform(X_train, y_train)
        
        # Apply sampling
        if sampler:
            sampler_instance = sampler(**sampler_params)
            X_resampled, y_resampled = sampler_instance.fit_resample(X_train_preprocessed, y_train)
        else:
            X_resampled, y_resampled = X_train_preprocessed, y_train

        # Create and train model
        final_model = model.set_params(**model_params)
        final_model.fit(X_resampled, y_resampled)

        # =====================
        # MLflow Logging
        # =====================
        # Create pipeline and signature
        full_pipeline = Pipeline([
            ("preprocessing", preprocessor),
            ("classifier", final_model)
        ])
        
        # Generate input example and signature
        input_example = X_train.sample(5, random_state=42) if input_example is None else input_example
        signature = infer_signature(input_example, full_pipeline.predict(input_example))

        sampler_name = sampler.__name__ if sampler else "None"
        sampler_params_prefixed = {f"sampler_{k}": v for k,v in (sampler_params or {}).items()}
        
        mlflow.log_params({
            "sampler": sampler_name,
            "model": model.__class__.__name__,
            **sampler_params_prefixed,  # Prefixed sampler params
            **model_params,
            "dataset_version": dataset_version
        })
        
        # For full transparency, also log the sampler object
        if sampler:
            mlflow.log_dict(
                {
                    "sampler_class": sampler_name,
                    "sampler_params": sampler_params or {},
                    # "sampler_code": inspect.getsource(sampler.__class__)
                },
                "sampler_config.json"
            )

        # Tags and metadata
        mlflow.set_tags({
            **tags,
            "project": "spotify_popularity",
            "model_type": "classifier",
            **metadata
        })

        # Model logging with metadata
        mlflow.sklearn.log_model(
            sk_model=full_pipeline,
            artifact_path="model",
            signature=signature,
            input_example=input_example,
            registered_model_name=registered_model_name,
            metadata={
                "features": list(X_train.columns),
                "target": y_train.name if hasattr(y_train, 'name') else "popularity",
                **metadata
            }
        )

        # =====================
        # Additional Artifacts
        # =====================
        if log_artifacts:
            with tempfile.TemporaryDirectory() as tmp_dir:
                # Feature importance plot
                if hasattr(final_model, 'feature_importances_'):
                    plt.figure(figsize=(10, 6))
                    
                    # Get feature names from preprocessing
                    try:
                        encoder = preprocessor.named_steps.get('encoder', None)
                        if encoder and hasattr(encoder, 'get_feature_names_out'):
                            feature_names = encoder.get_feature_names_out()
                        else:
                            feature_names = [f"feature_{i}" for i in range(len(final_model.feature_importances_))]
                    except Exception as e:
                        print(f"Could not retrieve feature names due to: {e}")
                        feature_names = [f"feature_{i}" for i in range(len(final_model.feature_importances_))]
                    
                    # Create series with proper feature names
                    importances = pd.Series(
                        final_model.feature_importances_,
                        index=feature_names
                    ).sort_values()
                    
                    importances.plot.barh()
                    plt.title("Feature Importance")
                    plt.tight_layout()
                    plt.savefig(f"{tmp_dir}/feature_importance.png")
                    plt.close()

                # Preprocessing config
                preprocessor_config = {
                    "steps": list(preprocessor.named_steps.keys()),
                    "params": preprocessor.get_params()
                }
                # with open(f"{tmp_dir}/preprocessor_config.json", 'w') as f:
                #     json.dump(preprocessor_config, f)

                mlflow.log_artifacts(tmp_dir)

        # =====================
        # Evaluation and Metrics
        # =====================
        evaluator = ModelEvaluator(full_pipeline, X_val, y_val)
        metrics = evaluator.evaluate(log_to_mlflow=True)
        
        return metrics


In [147]:

# Example experiment configuration with all parameters
experiments = [
    {
        "model": XGBClassifier(),
        "sampler": BalancedResampler,
        "preprocessor": base_preprocessor,
        "X_train": X_train,
        "y_train": y_train,
        "X_val": X_val,
        "y_val": y_val,
        "model_params": {
            'n_estimators': 300,
            'max_depth': 8,
            'learning_rate': 0.1,
            'scale_pos_weight': 1.9
        },
        "sampler_params": {},
        "dataset_version": "2024-03-v2",
        "input_example": X_train.sample(5),
        "metadata": {
            "business_impact": "high",
            "owner": "data-team",
            "description": "XGBoost with class balancing"
        },
        "tags": {
            "stage": "production-candidate",
            "data_source": "spotify-api"
        },
        # "registered_model_name": "SpotifyPopularityClassifier",
        "log_artifacts": True
    }
]

# Running experiments
for exp in experiments:
    results = run_experiment(**exp)
    print(f"Experiment results: {results}")



🏃 View run adaptable-pig-419 at: http://localhost:5000/#/experiments/400003216354571829/runs/389bc16c82904561b4e9d91a3bcc5851
🧪 View experiment at: http://localhost:5000/#/experiments/400003216354571829
Experiment results: {'accuracy': 0.8359727531399994, 'precision': 0.20635829872547615, 'recall': 0.8972602739726028, 'f1': 0.3355454651298172, 'roc_auc': np.float64(0.9378699276874045)}


In [165]:
# Example of how to use the MLflowExperimentRunner class with your experiment configuration

from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
import pandas as pd
import numpy as np


# 1. Create an instance of the experiment runner
runner = MLflowExperimentRunner(
    experiment_name="Spotify_Popularity",
    tracking_uri="http://localhost:5000",
    evaluator_class=ModelEvaluator  # Set default evaluator class
)

# 2. Define your experiment configurations
experiments = [
    # {
    #     "model": XGBClassifier(),
    #     "sampler": BalancedResampler,  # Your custom resampler class
    #     "preprocessor": base_preprocessor,  # Your preprocessing pipeline
    #     "X_train": X_train,
    #     "y_train": y_train,
    #     "X_val": X_val,
    #     "y_val": y_val,
    #     "model_params": {
    #         'n_estimators': 300,
    #         'max_depth': 8,
    #         'learning_rate': 0.1,
    #         'scale_pos_weight': 1.9
    #     },
    #     "sampler_params": {},
    #     "dataset_version": "2024-03-v2",
    #     "input_example": X_train.sample(5),
    #     "metadata": {
    #         "business_impact": "high",
    #         "owner": "data-team",
    #         "description": "XGBoost with class balancing"
    #     },
    #     "tags": {
    #         "stage": "production-candidate",
    #         "data_source": "spotify-api"
    #     },
    #     # "registered_model_name": "SpotifyPopularityClassifier",  # Uncomment to register
    #     "log_artifacts": True
    # },
    # You can add more experiment configurations here
    {
        "model": RandomForestClassifier(),
        "preprocessor": base_preprocessor,  # Your preprocessing pipeline
        "sampler": BalancedResampler,
        "X_train": X_train,
        "y_train": y_train, 
        "X_val": X_val,
        "y_val": y_val,
        "model_params": {
            'n_estimators': 200,
            'max_depth': 10,
            'min_samples_split': 5
        },
        "sampler_params": {},
        "metadata": {
            "description": "Random Forest baseline model"
        },
        "tags": {
            "stage": "baseline"
        }
    }
]

# 3. Run all experiments at once
results = runner.run_experiments(experiments)
for i, result in enumerate(results):
    print(f"Experiment {i+1} results: {result}")

# Or run them individually
# for exp in experiments:
#     result = runner.run_experiment(**exp)
#     print(f"Experiment results: {result}")



🏃 View run debonair-carp-399 at: http://localhost:5000/#/experiments/400003216354571829/runs/a98ddf3716d542309ccde363b55f2bca
🧪 View experiment at: http://localhost:5000/#/experiments/400003216354571829
Experiment 1 results: {'accuracy': 0.8851263185123445, 'precision': 0.24409145401609864, 'recall': 0.7099626400996264, 'f1': 0.36328299241700124, 'roc_auc': np.float64(0.8994528709552995)}
