In [None]:
!pip install optuna



In [None]:
import pandas as pd
import optuna
import lightgbm as lgb
import numpy as np
from sklearn.metrics import roc_auc_score

In [None]:
import warnings
warnings.simplefilter("ignore")

In [None]:
data = pd.read_csv('/content/drive/MyDrive/parttime/peak_table.csv')

In [None]:
cd /content/drive/MyDrive/parttime/allmodels

/content/drive/MyDrive/parttime/allmodels


In [None]:
from util import get_labels
y = get_labels(data)
print(y.shape)

[0 0 0 0 0]
(1072,)


In [None]:
# split data
import lightgbm as lgb
from sklearn.model_selection import train_test_split

# Splitting the data into features (X) and target (y)
X = data.drop(columns=['group', 'label'], axis=1)

# Splitting the data into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:

def objective(trial, X_train, X_val, y_train, y_val):
    # Creating LightGBM datasets
    train_data = lgb.Dataset(X_train, label=y_train)
    val_data = lgb.Dataset(X_val, label=y_val)

    # Setting up the parameters for the classifier
    params = {
    'objective': trial.suggest_categorical('objective', ['multiclass']),
    'num_class': 8,
    'num_leaves': trial.suggest_int('num_leaves', 50, 100, step=5),
    'max_depth': trial.suggest_int('max_depth', 2, 5, step=1)
    }

    # Training the classifier
    model = lgb.train(params, train_data,
                #   valid_sets=[train_data, val_data],
                  callbacks=[lgb.log_evaluation(10)])

    # Making predictions on the validation set
    y_pred_proba = model.predict(X_val)
    y_pred = np.argmax(y_pred_proba, axis=1)
    # print(y_pred.shape, y_val.shape)
    auc = roc_auc_score(y_val, y_pred_proba, multi_class='ovr')
    return auc

In [None]:
study = optuna.create_study(direction='maximize')
study.optimize(lambda x: objective(x, X_train, X_val, y_train, y_val), n_trials=3)

[I 2024-04-28 09:10:10,383] A new study created in memory with name: no-name-2c09a4e4-f3fe-483e-8231-e802ed97efd6


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.020349 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 182305
[LightGBM] [Info] Number of data points in the train set: 857, number of used features: 716
[LightGBM] [Info] Start training from score -2.026050
[LightGBM] [Info] Start training from score -2.043908
[LightGBM] [Info] Start training from score -1.991264
[LightGBM] [Info] Start training from score -2.358989
[LightGBM] [Info] Start training from score -1.982753
[LightGBM] [Info] Start training from score -1.965946
[LightGBM] [Info] Start training from score -2.449373
[LightGBM] [Info] Start training from score -1.941254


[I 2024-04-28 09:10:32,572] Trial 0 finished with value: 0.9999583749583749 and parameters: {'objective': 'multiclass', 'num_leaves': 70, 'max_depth': 5}. Best is trial 0 with value: 0.9999583749583749.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.015130 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 182305
[LightGBM] [Info] Number of data points in the train set: 857, number of used features: 716
[LightGBM] [Info] Start training from score -2.026050
[LightGBM] [Info] Start training from score -2.043908
[LightGBM] [Info] Start training from score -1.991264
[LightGBM] [Info] Start training from score -2.358989
[LightGBM] [Info] Start training from score -1.982753
[LightGBM] [Info] Start training from score -1.965946
[LightGBM] [Info] Start training from score -2.449373
[LightGBM] [Info] Start training from score -1.941254


[I 2024-04-28 09:10:37,108] Trial 1 finished with value: 0.99991674991675 and parameters: {'objective': 'multiclass', 'num_leaves': 90, 'max_depth': 2}. Best is trial 0 with value: 0.9999583749583749.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.014403 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 182305
[LightGBM] [Info] Number of data points in the train set: 857, number of used features: 716
[LightGBM] [Info] Start training from score -2.026050
[LightGBM] [Info] Start training from score -2.043908
[LightGBM] [Info] Start training from score -1.991264
[LightGBM] [Info] Start training from score -2.358989
[LightGBM] [Info] Start training from score -1.982753
[LightGBM] [Info] Start training from score -1.965946
[LightGBM] [Info] Start training from score -2.449373
[LightGBM] [Info] Start training from score -1.941254

[I 2024-04-28 09:10:48,710] Trial 2 finished with value: 0.9999375624375624 and parameters: {'objective': 'multiclass', 'num_leaves': 90, 'max_depth': 3}. Best is trial 0 with value: 0.9999583749583749.





In [None]:
print(study.best_value)
print(study.best_params)

0.9999583749583749
{'objective': 'multiclass', 'num_leaves': 70, 'max_depth': 5}
