In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.metrics import make_scorer, roc_auc_score

# Загрузка данных
train_features = pd.read_csv('DOTA2_TRAIN_features.csv')
train_targets = pd.read_csv('DOTA2_TRAIN_targets.csv')['radiant_win']
test_features = pd.read_csv('DOTA2_TEST_features.csv')

# Сохранение match_id
train_match_ids = train_features['match_id']
test_match_ids = test_features['match_id']

# Исключение столбца match_id из признаков
train_features = train_features.drop('match_id', axis=1, errors='ignore')
test_features = test_features.drop('match_id', axis=1, errors='ignore')

# Обработка пропущенных значений
train_features = train_features.fillna(0)
test_features = test_features.fillna(0)

# Преобразование категориальных признаков в количественные с помощью One-Hot Encoding
train_features = pd.get_dummies(train_features)
test_features = pd.get_dummies(test_features)

# Выбор модели и настройка гиперпараметров
model = RandomForestClassifier()
param_grid = {'n_estimators': [50, 100, 200], 'max_depth': [None, 10, 20]}
grid_search = GridSearchCV(model, param_grid, cv=5, scoring=make_scorer(roc_auc_score))
grid_search.fit(train_features, train_targets)

best_model = grid_search.best_estimator_

# Кросс-валидация
cross_val_auc = cross_val_score(best_model, train_features, train_targets, cv=5, scoring=make_scorer(roc_auc_score))
print("Cross-validated AUC: {:.4f}".format(cross_val_auc.mean()))

# Предсказание на тестовых данных
test_predictions = best_model.predict_proba(test_features)[:, 1]

# Создание файла решения
submission_df = pd.DataFrame({'match_id': test_match_ids, 'radiant_win': test_predictions})
submission_df.to_csv('submission.csv', index=False)


Cross-validated AUC: 0.6989
