In [124]:
import pandas as pd
import numpy as np
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import f1_score
import optuna

In [None]:
data = pd.read_csv('train_sample_dropped.csv')

In [None]:
y_data = data['flag']
X_data = data.drop('flag', axis=1)

In [131]:
X, X_test, y, y_test = train_test_split(X_data, y_data, test_size=0.1, random_state=42)

In [132]:
n_splits = 5
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

In [12]:
def objective(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 50, 1000),
        "max_depth": trial.suggest_int("max_depth", 3, 10),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3, log=True),
        "num_leaves": trial.suggest_int("num_leaves", 10, 60),
        "bagging_fraction": trial.suggest_float("bagging_fraction", 0.6, 1.0, log=True),
        "feature_fraction": trial.suggest_float("feature_fraction", 0.6, 1.0, log=True),
        "lambda_l1": trial.suggest_float("lambda1", 1e-3, 10.0, log=True),
        "lambda_l2": trial.suggest_float("lambda2", 1e-3, 10.0, log=True),
        "verbosity": -1
    }
    all_f1 = []
    for i, (train_ind, val_ind) in enumerate(skf.split(X.values, y)):
        X_train, X_val = X.iloc[train_ind, :], X.iloc[val_ind, :]
        y_train, y_val = y.iloc[train_ind], y.iloc[val_ind]
        model = LGBMClassifier(**params, verbose=False)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_val)
        f1 = f1_score(y_pred, y_val)
        all_f1.append(f1) 
    
    return sum(all_f1) / len(all_f1)

# Запускаем оптимизацию
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=30, show_progress_bar=True)

[I 2025-05-12 21:18:04,952] A new study created in memory with name: no-name-cb76d3b6-6f19-4b9e-a582-7077ec9776f9
found 0 physical cores < 1
  File "c:\Users\zayda\AppData\Local\Programs\Python\Python312\Lib\site-packages\joblib\externals\loky\backend\context.py", line 282, in _count_physical_cores
    raise ValueError(f"found {cpu_count_physical} physical cores < 1")
Best trial: 0. Best value: 0.707744:   3%|▎         | 1/30 [01:03<30:53, 63.91s/it]

[I 2025-05-12 21:19:08,855] Trial 0 finished with value: 0.7077443308397018 and parameters: {'n_estimators': 668, 'max_depth': 8, 'learning_rate': 0.02279443608487209, 'num_leaves': 53, 'bagging_fraction': 0.6265239507915653, 'feature_fraction': 0.9152684112019965, 'lambda1': 0.003243474576550798, 'lambda2': 0.6905350389594642}. Best is trial 0 with value: 0.7077443308397018.


Best trial: 0. Best value: 0.707744:   7%|▋         | 2/30 [01:13<14:48, 31.72s/it]

[I 2025-05-12 21:19:18,051] Trial 1 finished with value: 0.6885928370416233 and parameters: {'n_estimators': 69, 'max_depth': 4, 'learning_rate': 0.10064182315805366, 'num_leaves': 30, 'bagging_fraction': 0.6029166962365372, 'feature_fraction': 0.915489487271424, 'lambda1': 0.0016644375663454494, 'lambda2': 0.20973673358793712}. Best is trial 0 with value: 0.7077443308397018.


Best trial: 0. Best value: 0.707744:  10%|█         | 3/30 [02:06<18:42, 41.58s/it]

[I 2025-05-12 21:20:11,348] Trial 2 finished with value: 0.7069580200583528 and parameters: {'n_estimators': 814, 'max_depth': 9, 'learning_rate': 0.037209711977739465, 'num_leaves': 29, 'bagging_fraction': 0.897784654031551, 'feature_fraction': 0.7198030602974201, 'lambda1': 1.345382356177075, 'lambda2': 0.06146101013950743}. Best is trial 0 with value: 0.7077443308397018.


Best trial: 3. Best value: 0.710209:  13%|█▎        | 4/30 [02:58<19:49, 45.77s/it]

[I 2025-05-12 21:21:03,556] Trial 3 finished with value: 0.7102091788135013 and parameters: {'n_estimators': 573, 'max_depth': 8, 'learning_rate': 0.03557713066363677, 'num_leaves': 54, 'bagging_fraction': 0.738680760712416, 'feature_fraction': 0.6773195155785979, 'lambda1': 1.2907639829751567, 'lambda2': 6.3402770018642265}. Best is trial 3 with value: 0.7102091788135013.


Best trial: 3. Best value: 0.710209:  17%|█▋        | 5/30 [03:39<18:22, 44.09s/it]

[I 2025-05-12 21:21:44,682] Trial 4 finished with value: 0.7074077884260672 and parameters: {'n_estimators': 815, 'max_depth': 4, 'learning_rate': 0.056974715353868395, 'num_leaves': 49, 'bagging_fraction': 0.635691519977367, 'feature_fraction': 0.7763745369115398, 'lambda1': 6.153964124350213, 'lambda2': 0.010179611813228205}. Best is trial 3 with value: 0.7102091788135013.


Best trial: 3. Best value: 0.710209:  20%|██        | 6/30 [04:19<16:59, 42.47s/it]

[I 2025-05-12 21:22:23,986] Trial 5 finished with value: 0.6993379958011847 and parameters: {'n_estimators': 784, 'max_depth': 5, 'learning_rate': 0.21652799577680498, 'num_leaves': 39, 'bagging_fraction': 0.8353569950281039, 'feature_fraction': 0.6738277134872971, 'lambda1': 3.116291126088726, 'lambda2': 0.003334232162813424}. Best is trial 3 with value: 0.7102091788135013.


Best trial: 3. Best value: 0.710209:  23%|██▎       | 7/30 [04:32<12:41, 33.09s/it]

[I 2025-05-12 21:22:37,782] Trial 6 finished with value: 0.7044803509844596 and parameters: {'n_estimators': 323, 'max_depth': 5, 'learning_rate': 0.1036706246638724, 'num_leaves': 11, 'bagging_fraction': 0.759244924509228, 'feature_fraction': 0.7482444977505933, 'lambda1': 0.5267543768826327, 'lambda2': 0.7154152838001587}. Best is trial 3 with value: 0.7102091788135013.


Best trial: 3. Best value: 0.710209:  27%|██▋       | 8/30 [05:16<13:21, 36.43s/it]

[I 2025-05-12 21:23:21,366] Trial 7 finished with value: 0.7066082489308667 and parameters: {'n_estimators': 736, 'max_depth': 6, 'learning_rate': 0.04085408508958454, 'num_leaves': 55, 'bagging_fraction': 0.8215408193360434, 'feature_fraction': 0.9897450017358452, 'lambda1': 0.02097773431160054, 'lambda2': 0.09607369633977557}. Best is trial 3 with value: 0.7102091788135013.


Best trial: 3. Best value: 0.710209:  30%|███       | 9/30 [05:37<11:04, 31.62s/it]

[I 2025-05-12 21:23:42,416] Trial 8 finished with value: 0.7040644614150271 and parameters: {'n_estimators': 501, 'max_depth': 4, 'learning_rate': 0.06734204833406479, 'num_leaves': 30, 'bagging_fraction': 0.6112388309159374, 'feature_fraction': 0.9023521199909422, 'lambda1': 1.2260320666079125, 'lambda2': 0.004324958020575287}. Best is trial 3 with value: 0.7102091788135013.


Best trial: 3. Best value: 0.710209:  33%|███▎      | 10/30 [05:54<09:02, 27.13s/it]

[I 2025-05-12 21:23:59,489] Trial 9 finished with value: 0.6991426260017833 and parameters: {'n_estimators': 522, 'max_depth': 4, 'learning_rate': 0.21588575400946342, 'num_leaves': 15, 'bagging_fraction': 0.6945272841531949, 'feature_fraction': 0.6964296579139687, 'lambda1': 0.11517961285702925, 'lambda2': 0.033095198702220774}. Best is trial 3 with value: 0.7102091788135013.


Best trial: 3. Best value: 0.710209:  37%|███▋      | 11/30 [07:33<15:31, 49.03s/it]

[I 2025-05-12 21:25:38,177] Trial 10 finished with value: 0.7084566221037121 and parameters: {'n_estimators': 997, 'max_depth': 10, 'learning_rate': 0.015288365784559782, 'num_leaves': 60, 'bagging_fraction': 0.98887945609231, 'feature_fraction': 0.6149062301074917, 'lambda1': 0.13879308510721902, 'lambda2': 9.822730949977805}. Best is trial 3 with value: 0.7102091788135013.


Best trial: 3. Best value: 0.710209:  40%|████      | 12/30 [09:07<18:52, 62.93s/it]

[I 2025-05-12 21:27:12,900] Trial 11 finished with value: 0.7063807691487658 and parameters: {'n_estimators': 948, 'max_depth': 10, 'learning_rate': 0.010362757445727568, 'num_leaves': 58, 'bagging_fraction': 0.9743333268075789, 'feature_fraction': 0.6002278268780606, 'lambda1': 0.16204548178016, 'lambda2': 9.198560626489568}. Best is trial 3 with value: 0.7102091788135013.


Best trial: 3. Best value: 0.710209:  43%|████▎     | 13/30 [09:39<15:10, 53.54s/it]

[I 2025-05-12 21:27:44,801] Trial 12 finished with value: 0.6967084035190577 and parameters: {'n_estimators': 324, 'max_depth': 8, 'learning_rate': 0.014814399405996055, 'num_leaves': 42, 'bagging_fraction': 0.7119219819827474, 'feature_fraction': 0.6124250291396158, 'lambda1': 0.024168297357686566, 'lambda2': 7.750182257233705}. Best is trial 3 with value: 0.7102091788135013.


Best trial: 3. Best value: 0.710209:  47%|████▋     | 14/30 [11:03<16:41, 62.57s/it]

[I 2025-05-12 21:29:08,247] Trial 13 finished with value: 0.7100513570611489 and parameters: {'n_estimators': 971, 'max_depth': 10, 'learning_rate': 0.02244111608239003, 'num_leaves': 60, 'bagging_fraction': 0.9981681471309004, 'feature_fraction': 0.6388702083289154, 'lambda1': 0.3095628176627396, 'lambda2': 1.9278445485978746}. Best is trial 3 with value: 0.7102091788135013.


Best trial: 3. Best value: 0.710209:  50%|█████     | 15/30 [11:37<13:31, 54.13s/it]

[I 2025-05-12 21:29:42,818] Trial 14 finished with value: 0.702479320024529 and parameters: {'n_estimators': 369, 'max_depth': 8, 'learning_rate': 0.023783422154752258, 'num_leaves': 45, 'bagging_fraction': 0.7612186437500357, 'feature_fraction': 0.6473035451500556, 'lambda1': 0.5759335762952094, 'lambda2': 1.7001678501991804}. Best is trial 3 with value: 0.7102091788135013.


Best trial: 3. Best value: 0.710209:  53%|█████▎    | 16/30 [11:53<09:54, 42.43s/it]

[I 2025-05-12 21:29:58,101] Trial 15 finished with value: 0.6888914944303147 and parameters: {'n_estimators': 109, 'max_depth': 7, 'learning_rate': 0.027455982466304592, 'num_leaves': 50, 'bagging_fraction': 0.8820765186805263, 'feature_fraction': 0.7892978074199062, 'lambda1': 0.02499005373232594, 'lambda2': 2.2998550258761727}. Best is trial 3 with value: 0.7102091788135013.


Best trial: 3. Best value: 0.710209:  57%|█████▋    | 17/30 [12:59<10:47, 49.77s/it]

[I 2025-05-12 21:31:04,935] Trial 16 finished with value: 0.7091489723099509 and parameters: {'n_estimators': 640, 'max_depth': 9, 'learning_rate': 0.0381401086860576, 'num_leaves': 60, 'bagging_fraction': 0.6919297499834133, 'feature_fraction': 0.6494619854140866, 'lambda1': 9.091354086663218, 'lambda2': 2.3504011657172796}. Best is trial 3 with value: 0.7102091788135013.


Best trial: 3. Best value: 0.710209:  60%|██████    | 18/30 [13:44<09:38, 48.25s/it]

[I 2025-05-12 21:31:49,640] Trial 17 finished with value: 0.7048550846092484 and parameters: {'n_estimators': 902, 'max_depth': 9, 'learning_rate': 0.08447390884060811, 'num_leaves': 21, 'bagging_fraction': 0.8108835907298978, 'feature_fraction': 0.8227184132749644, 'lambda1': 0.415303024578037, 'lambda2': 0.2922156760176523}. Best is trial 3 with value: 0.7102091788135013.


Best trial: 3. Best value: 0.710209:  63%|██████▎   | 19/30 [14:06<07:22, 40.25s/it]

[I 2025-05-12 21:32:11,252] Trial 18 finished with value: 0.6937776690582199 and parameters: {'n_estimators': 206, 'max_depth': 7, 'learning_rate': 0.0189466339987855, 'num_leaves': 46, 'bagging_fraction': 0.9214504723599165, 'feature_fraction': 0.6605103935771987, 'lambda1': 2.5275959021810293, 'lambda2': 0.0010230090428006273}. Best is trial 3 with value: 0.7102091788135013.


Best trial: 3. Best value: 0.710209:  67%|██████▋   | 20/30 [15:04<07:36, 45.66s/it]

[I 2025-05-12 21:33:09,519] Trial 19 finished with value: 0.7021118656678296 and parameters: {'n_estimators': 614, 'max_depth': 10, 'learning_rate': 0.011419136302688086, 'num_leaves': 39, 'bagging_fraction': 0.718600107580833, 'feature_fraction': 0.7302199020146339, 'lambda1': 0.04936813902274479, 'lambda2': 0.8765997714897793}. Best is trial 3 with value: 0.7102091788135013.


Best trial: 3. Best value: 0.710209:  70%|███████   | 21/30 [15:47<06:43, 44.79s/it]

[I 2025-05-12 21:33:52,258] Trial 20 finished with value: 0.7071426836077065 and parameters: {'n_estimators': 476, 'max_depth': 9, 'learning_rate': 0.03134907444506814, 'num_leaves': 54, 'bagging_fraction': 0.8603341901072975, 'feature_fraction': 0.6873451783538739, 'lambda1': 0.007449295699059515, 'lambda2': 4.391831802884639}. Best is trial 3 with value: 0.7102091788135013.


Best trial: 3. Best value: 0.710209:  73%|███████▎  | 22/30 [17:08<07:25, 55.68s/it]

[I 2025-05-12 21:35:13,356] Trial 21 finished with value: 0.7081411220794784 and parameters: {'n_estimators': 604, 'max_depth': 9, 'learning_rate': 0.034705029837238804, 'num_leaves': 60, 'bagging_fraction': 0.6619103768205064, 'feature_fraction': 0.6399940348615767, 'lambda1': 6.211638723457266, 'lambda2': 2.8308112100285685}. Best is trial 3 with value: 0.7102091788135013.


Best trial: 3. Best value: 0.710209:  77%|███████▋  | 23/30 [18:04<06:30, 55.78s/it]

[I 2025-05-12 21:36:09,383] Trial 22 finished with value: 0.7073071724624088 and parameters: {'n_estimators': 411, 'max_depth': 8, 'learning_rate': 0.04713227920773978, 'num_leaves': 55, 'bagging_fraction': 0.671623828809859, 'feature_fraction': 0.6294282541476618, 'lambda1': 7.865098302959122, 'lambda2': 1.5726011098199002}. Best is trial 3 with value: 0.7102091788135013.


Best trial: 3. Best value: 0.710209:  80%|████████  | 24/30 [19:36<06:39, 66.55s/it]

[I 2025-05-12 21:37:41,050] Trial 23 finished with value: 0.7081710525573359 and parameters: {'n_estimators': 690, 'max_depth': 10, 'learning_rate': 0.019363944392273638, 'num_leaves': 50, 'bagging_fraction': 0.7520652382315286, 'feature_fraction': 0.7021988199746154, 'lambda1': 0.2628538527053204, 'lambda2': 0.31771326905203673}. Best is trial 3 with value: 0.7102091788135013.


Best trial: 3. Best value: 0.710209:  83%|████████▎ | 25/30 [21:38<06:56, 83.39s/it]

[I 2025-05-12 21:39:43,718] Trial 24 finished with value: 0.70969858988729 and parameters: {'n_estimators': 880, 'max_depth': 9, 'learning_rate': 0.05492412685113303, 'num_leaves': 57, 'bagging_fraction': 0.7905536572663922, 'feature_fraction': 0.6599980725334165, 'lambda1': 1.247531063039433, 'lambda2': 3.609511747324812}. Best is trial 3 with value: 0.7102091788135013.


Best trial: 3. Best value: 0.710209:  87%|████████▋ | 26/30 [23:15<05:49, 87.45s/it]

[I 2025-05-12 21:41:20,635] Trial 25 finished with value: 0.7048680184784193 and parameters: {'n_estimators': 888, 'max_depth': 7, 'learning_rate': 0.14534673182576713, 'num_leaves': 56, 'bagging_fraction': 0.7938693886627032, 'feature_fraction': 0.6707038700795991, 'lambda1': 0.9710292917062124, 'lambda2': 4.664744885114369}. Best is trial 3 with value: 0.7102091788135013.


Best trial: 3. Best value: 0.710209:  90%|█████████ | 27/30 [25:29<05:03, 101.23s/it]

[I 2025-05-12 21:43:34,008] Trial 26 finished with value: 0.7078906443789335 and parameters: {'n_estimators': 992, 'max_depth': 8, 'learning_rate': 0.06304453875618206, 'num_leaves': 47, 'bagging_fraction': 0.7815516207071685, 'feature_fraction': 0.7410574066300872, 'lambda1': 2.012020647563308, 'lambda2': 0.9727141969986981}. Best is trial 3 with value: 0.7102091788135013.


Best trial: 3. Best value: 0.710209:  93%|█████████▎| 28/30 [27:49<03:45, 112.92s/it]

[I 2025-05-12 21:45:54,222] Trial 27 finished with value: 0.7095091421742492 and parameters: {'n_estimators': 889, 'max_depth': 10, 'learning_rate': 0.04705886835053195, 'num_leaves': 52, 'bagging_fraction': 0.9403396990774575, 'feature_fraction': 0.823886949639248, 'lambda1': 0.2831260875610823, 'lambda2': 4.664361664805743}. Best is trial 3 with value: 0.7102091788135013.


Best trial: 3. Best value: 0.710209:  97%|█████████▋| 29/30 [29:03<01:41, 101.43s/it]

[I 2025-05-12 21:47:08,845] Trial 28 finished with value: 0.7074523190549993 and parameters: {'n_estimators': 863, 'max_depth': 6, 'learning_rate': 0.07499418500801316, 'num_leaves': 36, 'bagging_fraction': 0.7374098729420856, 'feature_fraction': 0.6298298185002799, 'lambda1': 0.06885617353150639, 'lambda2': 0.4443328848292645}. Best is trial 3 with value: 0.7102091788135013.


Best trial: 3. Best value: 0.710209: 100%|██████████| 30/30 [30:54<00:00, 61.81s/it] 

[I 2025-05-12 21:48:59,206] Trial 29 finished with value: 0.7079334044266901 and parameters: {'n_estimators': 701, 'max_depth': 9, 'learning_rate': 0.02340782027185801, 'num_leaves': 52, 'bagging_fraction': 0.9532946700562546, 'feature_fraction': 0.6849752863015494, 'lambda1': 0.7256728703920281, 'lambda2': 1.309377745236475}. Best is trial 3 with value: 0.7102091788135013.





In [61]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1)

In [69]:
def objective(trial):
    params = {
        "iterations": trial.suggest_int("iterations", 100, 1000),
        "depth": trial.suggest_int("depth", 4, 10),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3, log=True),
        "subsample": trial.suggest_float("subsample", 0.6, 1.0, log=True),
        "l2_leaf_reg": trial.suggest_float("l2_leaf_reg", 1e-3, 10, log=True),
        "random_strength": trial.suggest_float("random_strength", 0.0, 10.0),
        "bagging_temperature": trial.suggest_float("bagging_temperature", 0.0, 1.0),
        "border_count": trial.suggest_int("border_count", 32, 255),
        "verbose": 0,
    }
    model = CatBoostClassifier(**params)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_val)
    return f1_score(y_pred, y_val)


# Запускаем оптимизацию
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50)

[I 2025-05-16 13:30:56,069] A new study created in memory with name: no-name-6bfe508a-84b2-43e2-bf24-f8a03f0eb318
[I 2025-05-16 13:31:08,315] Trial 0 finished with value: 0.6603482477407979 and parameters: {'iterations': 146, 'depth': 6, 'learning_rate': 0.02524110630916377, 'subsample': 0.6845452627352011, 'l2_leaf_reg': 0.2346421085337576, 'random_strength': 2.805363432343142, 'bagging_temperature': 0.019997649608886836, 'border_count': 112}. Best is trial 0 with value: 0.6603482477407979.
[I 2025-05-16 13:31:30,660] Trial 1 finished with value: 0.6972043010752688 and parameters: {'iterations': 320, 'depth': 6, 'learning_rate': 0.27494203663430433, 'subsample': 0.7306588490746503, 'l2_leaf_reg': 0.8487989443552472, 'random_strength': 6.526886259716379, 'bagging_temperature': 0.9382432232649237, 'border_count': 56}. Best is trial 1 with value: 0.6972043010752688.
[I 2025-05-16 13:32:10,388] Trial 2 finished with value: 0.6845901639344262 and parameters: {'iterations': 669, 'depth': 4,

In [135]:
study.best_params

{'n_estimators': 722,
 'max_depth': 10,
 'learning_rate': 0.044112234993877134,
 'subsample': 0.7967921767299626,
 'colsample_bytree': 0.9902645298264656,
 'lambda': 0.28898388933438157,
 'alpha': 0.88536586368353,
 'gamma': 3.2715697753064725e-06}