In [None]:
from mylib import base, dataset
from mylib.model_classification import ModelClass
from mylib.model_base import ModelBase

from dotenv import dotenv_values
from pathlib import Path
from pprint import pprint, pformat

import pandas as pd
import pandas.api.types as pd_types

import numpy as np

import plotly.express as plotly_px

import joblib

import seaborn as sns
import matplotlib.pyplot as plt


In [None]:
import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', 50) # Устанавливаем максимальное количество отображаемых столбцов равным 50
#pd.set_option('display.max_rows', 50) # Устанавливаем максимальное количество отображаемых строк равным 20
pd.options.display.float_format = '{:.5f}'.format # Устанавливаем формат отображения чисел с двумя знаками после запятой
pd.options.mode.use_inf_as_na = True # Настройка режима Pandas для рассмотрения бесконечностей (inf) как пропущенных значений (NA)

# Конфигурация формата отображения графиков в виде векторных изображений
%config InlineBackend.figure_format = 'svg'

# для построения графиков внутри Jupyter Notebook
%matplotlib inline

# 1. Загрузка конфигов

In [None]:
# загрузить параметры
settings_dict = {
    **dotenv_values("settings")
}

settings = base.Settings(settings_dict)
settings.enviroment["RANDOM_STATE"] = int(settings.enviroment["RANDOM_STATE"])
n_jobs = -1
verbose = 3

In [None]:
settings.enviroment

# 1. Загрузка датасета

In [None]:
params = joblib.load(Path(settings.result_folder, settings.enviroment["PARAMS_FILENAME_AFTER_EDA_AFTER_OUTLIERS"]))

In [None]:
y_train = joblib.load(Path(settings.result_folder, settings.enviroment["y_Train_FILENAME_TEMPLATE"] % "common"))
y_test = joblib.load(Path(settings.result_folder, settings.enviroment["y_Test_FILENAME_TEMPLATE"] % "common"))

X_train = joblib.load(Path(settings.result_folder, settings.enviroment["X_Train_FILENAME_TEMPLATE"] % "wo_scaler"))
X_test = joblib.load(Path(settings.result_folder, settings.enviroment["X_Test_FILENAME_TEMPLATE"] % "wo_scaler"))

X_train_StandardScaler = joblib.load(Path(settings.result_folder, settings.enviroment["X_Train_FILENAME_TEMPLATE"] % "StandardScaler"))
X_test_StandardScaler = joblib.load(Path(settings.result_folder, settings.enviroment["X_Test_FILENAME_TEMPLATE"] % "StandardScaler"))

X_train_MinMaxScaler = joblib.load(Path(settings.result_folder, settings.enviroment["X_Train_FILENAME_TEMPLATE"] % "MinMaxScaler"))
X_test_MinMaxScaler = joblib.load(Path(settings.result_folder, settings.enviroment["X_Test_FILENAME_TEMPLATE"] % "MinMaxScaler"))

X_train_MaxAbsScaler = joblib.load(Path(settings.result_folder, settings.enviroment["X_Train_FILENAME_TEMPLATE"] % "MaxAbsScaler"))
X_test_MaxAbsScaler = joblib.load(Path(settings.result_folder, settings.enviroment["X_Test_FILENAME_TEMPLATE"] % "MaxAbsScaler"))

X_train_RobustScaler = joblib.load(Path(settings.result_folder, settings.enviroment["X_Train_FILENAME_TEMPLATE"] % "RobustScaler"))
X_test_RobustScaler = joblib.load(Path(settings.result_folder, settings.enviroment["X_Test_FILENAME_TEMPLATE"] % "RobustScaler"))

# 2. Модель KNeighborsClassifier

In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
knn_model_name = "KNN"

In [None]:
rfc_grid_search_accuracy = ModelBase.load_or_create_and_fit_GridSearchCV(knn_model_name+"_StandardScaler", 
                                                                         KNeighborsClassifier, 
                                                                         {'n_neighbors': range(5, 21), 
                                                                          'p': [1, 2, 3]
                                                                         }, 
                                                                         X_train_StandardScaler, y_train, settings,
                                                                         n_jobs=n_jobs, 
                                                                         verbose=verbose,
                                                                         scoring='f1')
pprint(rfc_grid_search_accuracy.best_params_)
print(rfc_grid_search_accuracy.best_score_)       