In [None]:
import mlflow 
import optuna 

from xgboost import XGBClassifier
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split

In [None]:
def mlflow_callback(study: optuna.study, trial: optuna.Trial):
    mlflow.set_experiment("Alfa_Hack"+target_name)
    with mlflow.start_run(run_name=str(trial.number)):
        mlflow.log(trial.params)
        mlflow.log_metrics({"roc_auc":trial.value})


def objective_xgboost(trail):
    params = {
        'booster': trial.suggest_categorical('booster', ['gbtree']),
        'max_depth': trial.suggest_int('max_depth', 5, 15),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 1.0),
        'n_estimators': trial.suggest_int('n_estimators', 100, 5000),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'eval_metric':trial.suggest_categorical('eval_metric', ['mlogloss']),
    }

    model_xgb = XGBClassifier(device='cuda', enable_categorical=True, n_jobs=-1, **params)
    model_xgb.fit(x_train, y_train)
    y_pred_1 = model_xgb.predict_proba(x_test)[:,1]
    score = roc_auc_score(y_test,y_pred_1)

    return score

In [None]:
# загрузка предобработанных данных для target_1 и target_2
train_1_df = pd.read_csv('./Data/target_1_train.csv')
test_1_df = pd.read_csv('./Data/target_1_train.csv')
target_1 = train_1_df['target_1']
train_1_df.drop(['target_1'], axis=1, inplace=True)

train_2_df = pd.read_csv('./Data/target_2_train.csv')
test_2_df = pd.read_csv('./Data/target_2_train.csv')
target_2 = train_2_df['target_2']
train_2_df.drop(['target_1'], axis=1, inplace=True)

train_dfs = [train_1_df, train_2_df]
targets = [target_1, target_2]

params = []
data = []
for i in range(2):
    target_name = i+1
    study = optuna.create_study(study_name=f'target_{i+1}', direction='maximize')
    x_train, x_test, y_train, y_test = train_test_split(train_dfs[i], targets[i], test_size=0.2, random_state=42)
    study.optimize(objective_xgboost, n_trials=100, show_progress_bar=True, callbacks=[mlflow_callback])
    params.append(study.best_params)
    data.append(( x_train, x_test, y_train, y_test))

In [None]:
predictions = []
for target_index in range(2):
    model_t1 = XGBClassifier(**params[target_index])
    model_t1.fit(data[target_index][0], data[target_index][2])

    pred = model_t1.predict_proba(targets[target_index])[:,1]
    predictions.append(pred)

Выгрузка результатов

In [None]:
df = pd.DataFrame(columns=['id', 'target1', 'target2', 'score'])
df['id'] = test_1_df['id']
df['target1'] = predictions[0]
df['target2'] = predictions[1]
score = []

for index, item in enumerate(predictions[0]):
  score.append(max(item, predictions[1][index]))

df['score'] = score
df.drop(['target1', 'target2'], axis=1, inplace=True)
df.to_csv('./Data/out.csv', index=False)