In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.ensemble import RandomForestClassifier
import catboost as cb

data = pd.read_csv("Metabolic Syndrome.csv",
                   usecols=["Age", "WaistCirc", "BMI", "MetabolicSyndrome"])
data.dropna(inplace=True)
print(data.head())
print(data.isnull().sum())

predicts = data[['Age', 'WaistCirc', 'BMI']]
target = data['MetabolicSyndrome']
print(predicts)
print(target)
print(predicts.shape)
#Разделение на обучающую и тестовую выборки
A_train, A_test, y_train, y_test = train_test_split(predicts,target,train_size = 0.8)
print(A_train.shape)
print(A_test.shape)
#Обучение случайного леса с остановой
random_forest = RandomForestClassifier(max_depth=15,min_samples_split=10).fit(A_train, y_train)
y_preds_d = random_forest.predict(A_train)
print('F1 мера для тренировочных данных ', f1_score(y_preds_d,y_train,average='macro'))

y_pred = random_forest.predict(A_test)
print('F1 мера для тестовых данных ', f1_score(y_pred,y_test,average='macro'))

random_forest = RandomForestClassifier()

params_grid = {
    "max_depth": [12,18],
    "min_samples_leaf": [3,10],
    "min_samples_split": [6,12], #минимум примеров в вершине, при котором можно продолжить деление
}

grid_search_random_forest = GridSearchCV(estimator=random_forest,
                                         param_grid=params_grid,
                                         scoring="f1_macro",
                                         cv = 4)

grid_search_random_forest.fit(A_train, y_train)
best_model = grid_search_random_forest.best_estimator_
y_preds_d = best_model.predict(A_train)
print('F1 мера для тренировочных данных ', f1_score(y_preds_d,y_train,average='macro'))

y_pred = best_model.predict(A_test)
print('F1 мера для тестовых данных ', f1_score(y_pred,y_test,average="macro"))

model_catboost_clf = cb.CatBoostClassifier(iterations=3000,
                                           task_type="GPU",
                                           devices='0')
model_catboost_clf.fit(A_train, y_train)
y_preds_t = model_catboost_clf.predict(A_train,task_type="CPU")
print('F1 мера для тренировочных данных ', f1_score(y_preds_t,y_train,average='macro'))

y_preds = model_catboost_clf.predict(A_test,task_type="CPU")
print('F1 мера для тестовых данных ', f1_score(y_preds,y_test,average="macro"))