# Ноутбук для обучения модели
---

In [2]:
import numpy as np

from sklearn import datasets
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score

import pickle

In [3]:
# Загрузка данных для обучения
data = datasets.load_iris()

In [4]:
x_data = data.data
y_data = data.target

In [5]:
# Разбивка на обучающую и тестовую выборки 
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data)

In [6]:
print(x_train.shape, y_train.shape)

(112, 4) (112,)


### Обучение

In [7]:
# Загрузка модели случайного леса
clf = RandomForestClassifier(max_depth=2, random_state=0)
clf.fit(x_data, y_data)

RandomForestClassifier(max_depth=2, random_state=0)

In [None]:
# Обучение методом GreadSearch с разными параметрами
param_grid = {
                 'n_estimators': [3, 5, 10, 15, 20, 30, 50, 75, 100],
                 'max_depth': [2, 5, 7, 9]
             }

grid_clf = GridSearchCV(clf, param_grid, cv=5)
grid_clf.fit(x_train, y_train)


### Валидация

In [None]:
# Параметры лучшего эстиматора
grid_clf.best_estimator_

In [None]:
y_pred = grid_clf.predict(x_test)

In [None]:
f1 = f1_score(y_pred, y_test, average='macro')
acc = accuracy_score(y_pred, y_test)

In [None]:
print(f"Accuracy: {acc : .4f}")
print(f"F1-Score: {f1 : .4f}")
print(f"Точность (Precision): {precision_score(y_test, y_pred, average='macro') : .4f}")
print(f"Полнота (Recall): {recall_score(y_test, y_pred, average='macro') : .4f}")

### Сериализация

In [None]:
filename = "random_forest_model.pkl"
model = grid_clf

with open(filename, "wb") as f:
    pickle.dump(model, f)

In [None]:
with open(filename, "rb") as f:
    new_model = pickle.load(f)

In [119]:
# Проверка, что сохранение прошло успешно
y_pred = new_model.predict(x_test)
print(f1_score(y_test, y_pred, average="macro"))
print(accuracy_score(y_test, y_pred))

0.972632731253421
0.9736842105263158
