In [None]:
import pandas as pd
import optuna as opt
import wandb
from catboost import CatBoostClassifier
from sklearn.model_selection import cross_val_score
from sklearn.impute import KNNImputer

def objective(trial):
    params = {
        'iterations': trial.suggest_int('iterations', 200, 400),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.0598735729913422),
        'depth': trial.suggest_int('depth', 3, 6),
    }

    model = CatBoostClassifier(**params, random_state=42)

    score = cross_val_score(model, X, y, cv=3, scoring='accuracy').mean()
    
    wandb.log({"iterations": params['iterations'],
               "learning_rate": params['learning_rate'],
               "depth": params['depth'],
               "accuracy": score}
              )

    return score

wandb.init(project='test')

df = pd.read_csv('../train.csv')
df = df.drop(columns=['Name', 'PassengerId'])

binary_features = ['VIP', 'CryoSleep']
cryo_sleep_depending_features = ['RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck']
categorical_features_without_cabin = ['HomePlanet', 'Destination']
categorical_features_cleared = ['HomePlanet', 'Destination', 'Deck', 'Side']
numerical_features = ['ShoppingMall', 'Spa', 'VRDeck', 'RoomService', 'FoodCourt', 'Age']
imputer = KNNImputer(n_neighbors=1)

for i in range(len(df)):
    if df.at[i, 'CryoSleep'] == True:
        for feature in cryo_sleep_depending_features:
            df.at[i, feature] = 0.0

rows_with_many_missing_values = df[df.isna().sum(axis=1) > 1]
rows_with_one_missing_value = df[df.isna().sum(axis=1) == 1]
df = df.dropna()     
df[["Deck", "CabinNumber", "Side"]] = df["Cabin"].str.split("/", expand=True)
df = df.drop(columns=['Cabin', 'CabinNumber'])
df = df.astype({'CryoSleep': int, 'VIP': int, 'Transported': int})
df = pd.get_dummies(df, columns=categorical_features_cleared)

for feature in numerical_features:
    rows_with_missing_feature = rows_with_one_missing_value[rows_with_one_missing_value[feature].isna()] 
    rows_with_missing_feature[["Deck", "CabinNumber", "Side"]] = rows_with_missing_feature["Cabin"].str.split("/", expand=True)
    rows_with_missing_feature = rows_with_missing_feature.drop(columns=['Cabin', 'CabinNumber'])
    rows_with_missing_feature = rows_with_missing_feature.astype({'CryoSleep': int, 'VIP': int, 'Transported': int})
    rows_with_missing_feature = pd.get_dummies(rows_with_missing_feature, columns=categorical_features_cleared)
    df = pd.concat([df, rows_with_missing_feature], ignore_index=True)
    df = pd.DataFrame(imputer.fit_transform(df), columns=df.columns)

for feature in categorical_features_without_cabin:
    rows_with_missing_feature = rows_with_one_missing_value[rows_with_one_missing_value[feature].isna()] 
    rows_with_missing_feature[["Deck", "CabinNumber", "Side"]] = rows_with_missing_feature["Cabin"].str.split("/", expand=True)
    rows_with_missing_feature = rows_with_missing_feature.drop(columns=['Cabin', 'CabinNumber'])
    rows_with_missing_feature = rows_with_missing_feature.astype({'CryoSleep': int, 'VIP': int, 'Transported': int})
    rows_with_missing_feature = pd.get_dummies(rows_with_missing_feature, columns=categorical_features_cleared)
    df = pd.concat([df, rows_with_missing_feature], ignore_index=True)
    df = pd.DataFrame(imputer.fit_transform(df), columns=df.columns)

for feature in binary_features:
    rows_with_missing_feature = rows_with_one_missing_value[rows_with_one_missing_value[feature].isna()] 
    rows_with_missing_feature[["Deck", "CabinNumber", "Side"]] = rows_with_missing_feature["Cabin"].str.split("/", expand=True)
    rows_with_missing_feature = rows_with_missing_feature.drop(columns=['Cabin', 'CabinNumber'])
    rows_with_missing_feature = pd.get_dummies(rows_with_missing_feature, columns=categorical_features_cleared)
    df = pd.concat([df, rows_with_missing_feature], ignore_index=True)
    df = pd.DataFrame(imputer.fit_transform(df), columns=df.columns)

rows_with_missing_feature = rows_with_one_missing_value[rows_with_one_missing_value['Cabin'].isna()] 
rows_with_missing_feature = rows_with_missing_feature.drop(columns='Cabin')
rows_with_missing_feature = rows_with_missing_feature.astype({'CryoSleep': int, 'VIP': int, 'Transported': int})
rows_with_missing_feature = pd.get_dummies(rows_with_missing_feature, columns=categorical_features_without_cabin)
df = pd.concat([df, rows_with_missing_feature], ignore_index=True)
df = pd.DataFrame(imputer.fit_transform(df), columns=df.columns)

rows_with_many_missing_values = rows_with_many_missing_values.drop(columns='Cabin')

df = pd.concat([df, rows_with_missing_feature], ignore_index=True)
df = pd.DataFrame(imputer.fit_transform(df), columns=df.columns)

target = 'Transported'
X = df.drop(columns=target)
y = df[target]

study = opt.create_study(direction='maximize')
study.optimize(objective, n_trials=150)

best_params = study.best_params
wandb.config.update(best_params)
print("Лучшие параметры:", best_params)