In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from catboost import CatBoostClassifier, Pool
from sklearn.metrics import roc_auc_score

# Загрузка данных
# data = pd.read_csv('your_data.csv')
# Для примера создадим DataFrame с аналогичной структурой
data = pd.DataFrame({col: [0] * 1000 for col in [
    'TransactionID', 'isFraud', 'TransactionDT', 'TransactionAmt', 'ProductCD',
    'card1', 'card2', 'card3', 'card4', 'card5', 'card6', 'addr1', 'addr2',
    'dist1', 'dist2', 'P_emaildomain', 'R_emaildomain', 
    'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12',
    'C13', 'C14', 'D1', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9', 'D10',
    'D11', 'D12', 'D13', 'D14', 'D15', 'M1', 'M2', 'M3', 'M4', 'M5', 'M6', 'M7',
    'M8', 'M9'] + [f'V{i}' for i in range(1, 340)]})

# Добавим случайные данные для примера
import numpy as np
for col in data.columns:
    if col.startswith(('C', 'D', 'V')):
        data[col] = np.random.randint(0, 10, size=len(data))
    elif col in ['ProductCD', 'card4', 'card6', 'M1', 'M2', 'M3', 'M4', 'M5', 'M6', 'M7', 'M8', 'M9']:
        data[col] = np.random.choice(['A', 'B', 'C', 'D'], size=len(data))

data['isFraud'] = np.random.randint(0, 2, size=len(data))

# Разделение данных
X = data.drop(['isFraud', 'TransactionID'], axis=1)  # Удаляем ID и целевую переменную
y = data['isFraud']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Определяем категориальные признаки
cat_features = ['ProductCD', 'card4', 'card6'] + [f'M{i}' for i in range(1, 10)]

# Преобразуем категориальные признаки в строковый тип
for col in cat_features:
    X_train[col] = X_train[col].astype(str)
    X_test[col] = X_test[col].astype(str)

# Создаем Pool для CatBoost
train_pool = Pool(X_train, y_train, cat_features=cat_features)
test_pool = Pool(X_test, y_test, cat_features=cat_features)

# Обучение CatBoost
catboost_model = CatBoostClassifier(
    iterations=500,
    learning_rate=0.05,
    depth=6,
    eval_metric='AUC',
    random_seed=42,
    verbose=100
)

catboost_model.fit(train_pool, eval_set=test_pool)

# Предсказания
catboost_pred = catboost_model.predict_proba(test_pool)[:, 1]
catboost_auc = roc_auc_score(y_test, catboost_pred)
print(f"CatBoost ROC-AUC: {catboost_auc:.4f}")

0:	test: 0.5076014	best: 0.5076014 (0)	total: 172ms	remaining: 1m 26s
100:	test: 0.4852418	best: 0.5681454 (47)	total: 2.9s	remaining: 11.5s
200:	test: 0.4940434	best: 0.5681454 (47)	total: 5.29s	remaining: 7.87s
300:	test: 0.5011558	best: 0.5681454 (47)	total: 7.78s	remaining: 5.14s
400:	test: 0.4739065	best: 0.5681454 (47)	total: 10.3s	remaining: 2.54s
499:	test: 0.4802632	best: 0.5681454 (47)	total: 12.8s	remaining: 0us

bestTest = 0.5681454481
bestIteration = 47

Shrink model to first 48 iterations.
CatBoost ROC-AUC: 0.5681
