# An example for the verstackLight package
##### Author: Daniel Hans Munk, 2023

In [None]:
from sklearn.metrics import roc_auc_score, log_loss
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd

# Import toy dataset
from sklearn.datasets import load_iris
iris = load_iris()
X, y = iris.data, iris.target
X, y = X[y != 2], y[y != 2] # Remove class: y=2 so it is a binary classification problem

# We also add noisy features to make the problem harder.
random_state = np.random.RandomState(0)
n_samples, n_features = X.shape
X = np.concatenate([X, random_state.randn(n_samples, 300 * n_features)], axis=1)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=13
)
X_train, X_test, y_train, y_test  = pd.DataFrame(X_train), pd.DataFrame(X_test), pd.DataFrame(y_train), pd.DataFrame(y_test)

### Import verstackLight.LGBMTuner

In [None]:
import sys
ABSOLUTE_verstackLight_FOLDER_PATH = r'./verstackLight'
sys.path.append(ABSOLUTE_verstackLight_FOLDER_PATH[:-1].rsplit('/', 1)[0])
from verstackLight import LGBMTuner

In [None]:
#Important that tuner-inputs are pandas dataframes
X_train, X_test, y_train, y_test  = pd.DataFrame(X_train), pd.DataFrame(X_test), pd.DataFrame(y_train), pd.DataFrame(y_test)

In [None]:
# Initialize tuner
tuner = LGBMTuner(metric = 'log_loss',
                  trials = 25,
                  refit = True,
                  verbosity = 1,
                  visualization = True,
                  seed = 414243,
                  device_type = 'cpu',
                  n_jobs = 2)

# Fit tuner
tuner.fit(X_train, y_train.squeeze())

In [None]:
# Check score on test set
auc_score = roc_auc_score(list(y_test.values.flatten()),list(tuner.predict(X_test)))
log_loss_score = log_loss(list(y_test.values.flatten()),list(tuner.predict(X_test)))

print("The auc score on test set: {:.4f}".format(auc_score))
print("The log loss score on test set: {:.20f}".format(log_loss_score))