In [None]:
!pip install umap-learn
# !pip install numpy
# !pip install pandas
# !pip install keras
# !pip install scikit-learn
# !pip install opencv-python

Collecting umap-learn
  Downloading umap_learn-0.5.6-py3-none-any.whl (85 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.7/85.7 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
Collecting pynndescent>=0.5 (from umap-learn)
  Downloading pynndescent-0.5.12-py3-none-any.whl (56 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.8/56.8 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: pynndescent, umap-learn
Successfully installed pynndescent-0.5.12 umap-learn-0.5.6


In [None]:
from keras.callbacks import ModelCheckpoint
from keras.models import Sequential, Model
from keras.callbacks import ModelCheckpoint
from keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense
from keras.layers import BatchNormalization
from keras.layers import Concatenate
from keras.layers import GlobalAveragePooling2D
from keras.layers import Conv2DTranspose
from keras.layers import Resizing

import tensorflow as tf
import keras
import numpy as np
from scipy.stats import rankdata

In [None]:
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from sklearn.impute import KNNImputer
import umap

def pca_features(data: np.ndarray, n_components: int = 10) -> np.ndarray:
    flattened_data = np.array([img.flatten() for img in data])
    data_processed = PCA(n_components=n_components).fit_transform(flattened_data)
    return data_processed


def t_sne_features(data: np.ndarray, n_components: int = 10):
    flattened_data = np.array([img.flatten() for img in data])
    data_embeded = TSNE(n_components=n_components,
                        learning_rate='auto',
                        init='random',
                        method='exact',
                        perplexity=3).fit_transform(flattened_data)
    return data_embeded

def umap_features(data: np.ndarray, n_components: int = 10):
    flattened_data = np.array([img.flatten() for img in data])
    data_processed = umap.UMAP().fit_transform(flattened_data)
    return data_processed

In [None]:
def rank_based_transform(x, k=0.5):
    num_samp = np.sum(~np.isnan(x))
    ranks = (rankdata(x, method='ordinal').astype(float) - k) / (num_samp - 2 * k + 1)
    return np.log(ranks / (1 - ranks))


def data_standardization(arr: np.ndarray) -> np.ndarray:
    return (arr - arr.mean()) / arr.std()

In [None]:
import os
import cv2 as cv
import pandas as pd

def load_images_from_folder(folder: str) -> np.ndarray:
    """
    Функция подгрузки необходимого набора искусственных изображений из передаваемого каталога.

    :param folder: папка с изображениями, сохраненными в формате .png

    :return: список формата Numpy, содержащие AIO в объектах класса Image из Pillow
    """

    images = []
    for filename in os.listdir(folder):
        img = cv.imread(os.path.join(folder, filename), cv.IMREAD_GRAYSCALE)
        if img is not None:
            images.append(np.asarray(img).astype(np.float32))
    return np.asarray(images)

In [None]:
n_pca, n_t_sne, n_umap = 5, 2, 5
n_plants_use = 200

folder_images = "/content"
images = load_images_from_folder(folder_images)

# выделение фич из изображений
pca_features_ = pca_features(images, n_components=n_pca)
t_sne_features_ = t_sne_features(images, n_components=n_t_sne)
# umap_features_ = umap_features(images, n_components=n_umap)

total_features = np.concatenate((pca_features_, t_sne_features_), axis=1)[:n_plants_use]

df_wheat = pd.read_csv("/content/wheat_pheno_num_sync.csv")[:n_plants_use]

# выделение фич из самих маркеров
markers_df = pd.read_csv("/content/markers_poly_filtered_sync.csv").to_numpy()

# делим данные на обучение/валидацию/тест
test_percentage = 0.1

In [None]:
# импутирование данных
from sklearn.impute import SimpleImputer

n_neighbors = 5

labels = df_wheat[["Урожайность.зерна..г.", "Высота.растений..см"]].to_numpy()[:n_plants_use]
# labels = df_wheat[["Урожайность.зерна..г.", "Бурая.ржавчина..."]].to_numpy()[:n_plants_use]
# labels = df_wheat[["Урожайность.зерна..г.", "Желтая.ржавчина..."]].to_numpy()[:n_plants_use]

# (Пока просто средними значениями) импутируем данные, поскольку присутствуют пропуски
# imp = SimpleImputer(missing_values=np.nan, strategy='mean')
imp = KNNImputer(n_neighbors=n_neighbors, weights='uniform')
labels = imp.fit_transform(labels.reshape(-1, 2))

# в случае с импутированием
test_indices = np.random.choice(images.shape[0], int(images.shape[0] * test_percentage), replace=False)
train_indices = np.setdiff1d(np.array(list(range(images.shape[0]))), test_indices)

[[139.  85.]
 [266.  95.]
 [252. 100.]
 ...
 [156.  95.]
 [171.  90.]
 [240.  nan]]
[[139.  85.]
 [266.  95.]
 [252. 100.]
 ...
 [156.  95.]
 [171.  90.]
 [240.  89.]]


In [None]:
# отсеивание данных с пропусками
# df_wheat_no_nan = df_wheat[df_wheat["Урожайность.зерна..г."].notnull() & df_wheat["Высота.растений..см"].notnull()]
# df_wheat_no_nan = df_wheat[df_wheat["Урожайность.зерна..г."].notnull() & df_wheat["Бурая.ржавчина..."].notnull()]
df_wheat_no_nan = df_wheat[df_wheat["Урожайность.зерна..г."].notnull() & df_wheat["Желтая.ржавчина..."].notnull()]
images = images[df_wheat_no_nan.index]
total_features = total_features[df_wheat_no_nan.index]

# labels = df_wheat_no_nan[["Урожайность.зерна..г.", "Высота.растений..см"]].to_numpy()[:n_plants_use]
# labels = df_wheat[["Урожайность.зерна..г.", "Бурая.ржавчина..."]].to_numpy()
labels = df_wheat[["Урожайность.зерна..г.", "Желтая.ржавчина..."]].to_numpy()

# в случае с фильтрованными данными
test_indices = np.random.choice(np.array(list(range(labels.shape[0]))), size=int(labels.shape[0] * test_percentage), replace=False)
train_indices = np.setdiff1d(np.array(list(range(labels.shape[0]))), test_indices)
print(test_indices)
print(train_indices)

[  1 139   9  11  28  69  37  66   6 140  94 109  30 158   5  34  48 124
 199 131]
[  0   2   3   4   7   8  10  12  13  14  15  16  17  18  19  20  21  22
  23  24  25  26  27  29  31  32  33  35  36  38  39  40  41  42  43  44
  45  46  47  49  50  51  52  53  54  55  56  57  58  59  60  61  62  63
  64  65  67  68  70  71  72  73  74  75  76  77  78  79  80  81  82  83
  84  85  86  87  88  89  90  91  92  93  95  96  97  98  99 100 101 102
 103 104 105 106 107 108 110 111 112 113 114 115 116 117 118 119 120 121
 122 123 125 126 127 128 129 130 132 133 134 135 136 137 138 141 142 143
 144 145 146 147 148 149 150 151 152 153 154 155 156 157 159 160 161 162
 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198]


In [None]:
# нормализация данных при помощи логистической функции

# разделяем фенотипы
labels_1 = labels[:, 0]
labels_2 = labels[:, 1]

# нормализация, собственно
labels_1 = rank_based_transform(labels[:, 0])
labels_2 = rank_based_transform(labels[:, 1])

labels = np.concatenate((labels_1.reshape(len(labels_1), 1),
                         labels_2.reshape(len(labels_2), 1)), axis=1)

In [None]:
# нормализация при помощи классической формулы - (значение - среднее) / дисперсия

# разделяем фенотипы
labels_1 = labels[:, 0]
labels_2 = labels[:, 1]

labels_1 = data_standardization(labels_1)
labels_2 = data_standardization(labels_2)

labels = np.concatenate((labels_1.reshape(len(labels_1), 1),
                         labels_2.reshape(len(labels_2), 1)), axis=1)

In [None]:
train_images, train_labels, train_dict = images[train_indices], labels[train_indices], total_features[train_indices]
test_images, test_labels, test_dict = images[test_indices], labels[test_indices], total_features[test_indices]

In [None]:
from dataclasses import dataclass

# Модель сугубо нейронной сети
@dataclass
class SimpleCNNModel:
    n_epochs: int = 5
    n_row: int = 200
    n_col: int = 200
    input_channels: int = 1
    random_seed: int = 1234567890
    n_dict_features: int = 30
    n_trait: int = 2

    data_train: np.ndarray = np.ndarray([])
    features_train: np.ndarray = np.asarray([])
    labels_train: np.ndarray = np.ndarray([])

    data_test: np.ndarray = np.asarray([])
    features_test: np.ndarray = np.asarray([])
    labels_test: np.ndarray = np.asarray([])

    def build(self, hp: dict):
        """
        Функция построения модели нейросети с функциональным интерфейсом keras

        :param hp: набор гиперпараметров, отвечающих за конфигурация нейросети
        :return: граф-представление нейросети
        """

        inp_node = Input((self.n_row, self.n_col, self.input_channels), name="img_input")

        inp_node_dict = Input({self.n_dict_features}, name="dict_input")

        conv_node_1 = Conv2D(hp['first_conv2d_out_channels'],
                             kernel_size=(hp['first_conv2d_kernel_size'], hp['first_conv2d_kernel_size']),
                             padding='same',
                             strides=(1, 1),
                             activation=hp['first_conv2d_activation'], name="conv_map_1")(inp_node)
        if hp['need_extra_conv2d']:
            conv_node_1 = Conv2D(hp['extra_conv2d_out_channels'],
                                 kernel_size=(hp['extra_conv2d_kernel_size'], hp['extra_conv2d_kernel_size']),
                                 padding='same',
                                 strides=(1, 1),
                                 activation=hp['extra_conv2d_activation'], name="conv_map_extra")(conv_node_1)

        if hp['need_batch_norm_after_first_conv2d']:
            batch_node_1 = BatchNormalization()(conv_node_1)
            mp_node_1 = MaxPooling2D(pool_size=(2, 2))(batch_node_1)
        else:
            mp_node_1 = MaxPooling2D(pool_size=(2, 2))(conv_node_1)

        conv_node_2 = Conv2D(hp['second_conv2d_out_channels'],
                             kernel_size=(hp['second_conv2d_kernel_size'], hp['second_conv2d_kernel_size']),
                             padding='same',
                             strides=(1, 1),
                             activation=hp['second_conv2d_activation'], name="conv_map_2")(mp_node_1)

        if hp['need_batch_norm_after_second_conv2d']:
            batch_node_2 = BatchNormalization()(conv_node_2)
            mp_node_2 = MaxPooling2D(pool_size=(2, 2), name="max_pool_map")(batch_node_2)
        else:
            mp_node_2 = MaxPooling2D(pool_size=(2, 2), name="max_pool_map")(conv_node_2)

        if hp['need_deconv_block']:
            deconv_node_2 = Conv2DTranspose(
                hp['second_conv2d_out_channels'],
                kernel_size=(hp['second_conv2d_kernel_size'], hp['second_conv2d_kernel_size']),
                padding='same',
                strides=(2, 2),
                activation=hp['second_conv2d_activation'],
                name="deconv_2"
            )(mp_node_2)
            concat_node_2 = Concatenate(name="concat_2", axis=3)([deconv_node_2, conv_node_2])
            conv_node_deconv_2 = Conv2D(
                hp['second_conv2d_out_channels'],
                kernel_size=(hp['second_conv2d_kernel_size'], hp['second_conv2d_kernel_size']),
                padding='same',
                strides=(1, 1),
                activation=hp['second_conv2d_activation'],
                name="conv_deconv_2"
            )(concat_node_2)
            deconv_node_1 = Conv2DTranspose(
                hp['first_conv2d_out_channels'],
                kernel_size=(hp['first_conv2d_kernel_size'], hp['first_conv2d_kernel_size']),
                padding='same',
                strides=(2, 2),
                activation=hp['first_conv2d_activation'],
                name="deconv_1"
            )(conv_node_deconv_2)
            concat_node_1 = Concatenate(name="concat_1", axis=3)([deconv_node_1, conv_node_1])
            mp_node_2 = Conv2D(
                hp['first_conv2d_out_channels'],
                kernel_size=(hp['first_conv2d_kernel_size'], hp['first_conv2d_kernel_size']),
                padding='same',
                strides=(1, 1),
                activation=hp['first_conv2d_activation'],
                name="conv_deconv_1"
            )(concat_node_1)

        if hp['use_gap_1_or_flatten_0'] == 0:
            flatten_node = Flatten(name='flatten')(mp_node_2)
            dense_node = Dense(hp['num_feature_output'], activation=hp['dense_output_activation'],
                               name="img_feature_output")(flatten_node)
        elif hp['use_gap_1_or_flatten_0'] == 1:
            dense_node = GlobalAveragePooling2D(name="img_feature_output")(mp_node_2)

        concatenate_features = Concatenate(name="concat_features")([inp_node_dict, dense_node])

        out = Dense(self.n_trait, activation='linear', name="cnn_multioutput")(concatenate_features)

        model = Model(inputs=[inp_node, inp_node_dict], outputs=out, name="regression_model")

        return model

In [None]:
class ComboDataPool(keras.utils.Sequence):

    def __init__(self, images, features, labels, batch_size: int, max_len: int = -1):
        self.batch_size = batch_size
        self.images = images[:max_len]
        self.features = features[:max_len]
        self.labels = labels[:max_len]

    def __len__(self):
        return int(np.ceil(self.images.shape[0] / self.batch_size))

    def __getitem__(self, idx):
        batch_data = [self.images[idx * self.batch_size:(idx + 1) * self.batch_size],
                   self.features[idx * self.batch_size:(idx + 1) * self.batch_size]]
        batch_labels = self.labels[idx * self.batch_size:(idx + 1) * self.batch_size]

        return batch_data, batch_labels

In [None]:
from itertools import product, islice
from sklearn.model_selection import KFold
from keras.callbacks import EarlyStopping, ModelCheckpoint


class ComboModelTuner:

    @staticmethod
    @tf.function
    def fit_loss_mae(y_true: np.ndarray, y_pred: np.ndarray) -> np.array:
        """
        MAE-функция потерь для обучения при помощи стандартного метода '.fit()'
        """
        error = y_true - y_pred
        abs_error_1, abs_error_2 = tf.abs(error[:, 0]), tf.abs(error[:, 1])
        result_1, result_2 = tf.reduce_mean(abs_error_1), tf.reduce_mean(abs_error_2)
        return (result_1 + result_2) / 2

    @staticmethod
    @tf.function
    def fit_loss_mse(y_true: np.ndarray, y_pred: np.ndarray) -> np.array:
        """
        MSE-функция потерь для обучения при помощи стандартного метода '.fit()'
        """
        error = y_true - y_pred
        squared_error_1, squared_error_2 = tf.square(error), tf.square(error[:, 1])
        result_1, result_2 = tf.reduce_mean(squared_error_1), tf.reduce_mean(squared_error_2)
        return (result_1 + result_2) / 2

    @staticmethod
    @tf.function
    def custom_loss_mae(y_true: np.ndarray, y_pred: np.ndarray) -> np.array:
        """
        MAE-функция потерь для обучения при помощи пользовательской реализации цикла обучения
        """
        error = y_true - y_pred
        abs_error_1, abs_error_2 = tf.abs(error[:, 0]), tf.abs(error[:, 1])
        result_1, result_2 = tf.reduce_mean(abs_error_1), tf.reduce_mean(abs_error_2)
        return (result_1 + result_2) / 2

    @staticmethod
    @tf.function
    def custom_loss_mse(y_true: np.ndarray, y_pred: np.ndarray) -> np.array:
        """
        MSE-функция потерь для обучения при помощи пользовательской реализации цикла обучения
        """
        error = y_true - y_pred
        squared_error_1, squared_error_2 = tf.square(error), tf.square(error[:, 1])
        result_1, result_2 = tf.reduce_mean(squared_error_1), tf.reduce_mean(squared_error_2)
        return (result_1 + result_2) / 2

    @staticmethod
    def custom_cv(total_hp: dict, cnn_hp: dict, model: 'SimpleCNNModel',
                  splits_num: int = 10,
                  early_stop: bool = True, model_checkpoint: bool = True,
                  data_generator: bool = False) -> list:
        model_keras = model.build(cnn_hp)

        learning_data_pool = ComboDataPool(images=train_images,
                                           features=train_dict,
                                           labels=train_labels,
                                           batch_size=64)

        callbacks = []
        if early_stop:
            callback_early_stop = EarlyStopping(monitor="loss", min_delta=0.001, patience=2, verbose=1)
            callbacks.append(callback_early_stop)
        if model_checkpoint:
            callback_checkpoint = ModelCheckpoint(filepath="checkpoints/model_no_df_{epoch:02d}-{val_loss:.2f}.keras",
                                                  save_best_only=True, monitor="loss", verbose=1)
            callbacks.append(callback_checkpoint)

        model_keras.compile(optimizer=keras.optimizers.SGD(learning_rate=0.0001),
                            loss=ComboModelTuner.fit_loss_mae,
                            metrics=[ComboModelTuner.fit_loss_mse])

        learning_data_pool = ComboDataPool(images=train_images,
                                           features=train_dict,
                                           labels=train_labels,
                                           batch_size=64)

        mae_per_fold_tr, mse_per_fold_tr = [], []
        mae_per_fold_vd, mse_per_fold_vd = [], []

        kfold = KFold(n_splits=splits_num, shuffle=True)
        for j, (tr_idx, val_idx) in enumerate(kfold.split(model.features_train, model.data_train, model.labels_train)):
            if data_generator:
                history = model_keras.fit(learning_data_pool, epochs=total_hp["num_epochs_ll"],
                                      validation_data=([model.data_train[val_idx], model.features_train[val_idx]],
                                                      model.labels_train[val_idx]))
            else:
                history = model_keras.fit(x=[model.data_train[tr_idx], model.features_train[tr_idx]],
                                          y=model.labels_train[tr_idx],
                                          batch_size=total_hp["batch_size_ll"],
                                          epochs=total_hp["num_epochs_ll"],
                                          validation_data=([model.data_train[val_idx], model.features_train[val_idx]],
                                                          model.labels_train[val_idx]),
                                          callbacks=callbacks)

            scores = model_keras.evaluate(x=[model.data_train[tr_idx], model.features_train[tr_idx]],
                                          y=model.labels_train[tr_idx])

            mse_per_fold_tr.append(scores[0])
            mae_per_fold_tr.append(scores[1])

            scores = model_keras.evaluate(x=[model.data_train[val_idx], model.features_train[val_idx]],
                                          y=model.labels_train[val_idx])

            mse_per_fold_vd.append(scores[0])
            mae_per_fold_vd.append(scores[1])
            print(f"Fold #{j + 1} finished succesfully")

        return [mae_per_fold_tr, mse_per_fold_tr, mae_per_fold_vd, mse_per_fold_vd], model_keras

    @staticmethod
    def fit_cv(total_hp: dict, cnn_hp: dict, model: 'SimpleCNNModel',
               splits_num: int = 10,
               early_stop: bool = True, model_checkpoint: bool = True,
               data_generator: bool = False) -> list:
        model_keras = model.build(cnn_hp)

        learning_data_pool = ComboDataPool(images=train_images,
                                           features=train_dict,
                                           labels=train_labels,
                                           batch_size=64)

        callbacks = []
        if early_stop:
            callback_early_stop = EarlyStopping(monitor="loss", min_delta=0.001, patience=2, verbose=1)
            callbacks.append(callback_early_stop)
        if model_checkpoint:
            callback_checkpoint = ModelCheckpoint(filepath="checkpoints/model_no_df_{epoch:02d}-{val_loss:.2f}.keras",
                                                  save_best_only=True, monitor="loss", verbose=1)
            callbacks.append(callback_checkpoint)

        model_keras.compile(optimizer=keras.optimizers.SGD(learning_rate=0.0001),
                            loss=ComboModelTuner.custom_loss_mae,
                            metrics=[ComboModelTuner.custom_loss_mse])

        mae_per_fold_tr, mse_per_fold_tr = [], []
        mae_per_fold_vd, mse_per_fold_vd = [], []

        mae_per_fold_tr_epochend, mse_per_fold_tr_epochend = [], []
        mae_per_fold_vd_epochend, mse_per_fold_vd_epochend = [], []

        kfold = KFold(n_splits=splits_num, shuffle=True)
        for j, (tr_idx, val_idx) in enumerate(kfold.split(model.features_train, model.data_train, model.labels_train)):
            if data_generator:
                history = model_keras.fit(learning_data_pool, epochs=total_hp["num_epochs_ll"],
                                          validation_data=([model.data_train[val_idx], model.features_train[val_idx]],
                                                          model.labels_train[val_idx]))
            else:
                history = model_keras.fit(x=[model.data_train[tr_idx], model.features_train[tr_idx]],
                                          y=model.labels_train[tr_idx],
                                          batch_size=total_hp["batch_size_ll"],
                                          epochs=total_hp["num_epochs_ll"],
                                          validation_data=([model.data_train[val_idx], model.features_train[val_idx]],
                                                          model.labels_train[val_idx]),
                                          callbacks=callbacks)

            scores = model_keras.evaluate(x=[model.data_train[tr_idx], model.features_train[tr_idx]],
                                          y=model.labels_train[tr_idx])

            mae_per_fold_tr_epochend.append(scores[0])
            mse_per_fold_tr_epochend.append(scores[1])

            scores = model_keras.evaluate(x=[model.data_train[val_idx], model.features_train[val_idx]],
                                          y=model.labels_train[val_idx])

            mae_per_fold_vd_epochend.append(scores[0])
            mse_per_fold_vd_epochend.append(scores[1])

            tmp_mae_tr = history.history["loss"]
            tmp_mse_tr = history.history["custom_loss_mse"]
            tmp_mae_val = history.history["val_loss"]
            tmp_mse_val = history.history["val_custom_loss_mse"]

            mae_per_fold_tr = mse_per_fold_tr + tmp_mae_tr
            mse_per_fold_tr = mae_per_fold_tr + tmp_mae_tr
            mae_per_fold_vd = mae_per_fold_vd + tmp_mae_val
            mse_per_fold_vd = mse_per_fold_vd + tmp_mse_val
            print(f"Fold #{j + 1} finished succesfully")

        return [[mae_per_fold_tr_epochend, mse_per_fold_tr_epochend, mae_per_fold_vd_epochend, mse_per_fold_vd_epochend],
                [mae_per_fold_tr, mse_per_fold_tr, mae_per_fold_vd, mse_per_fold_vd]], model_keras

    @staticmethod
    def random_hyper_tuning(iters_num: int, hps_cnn: dict,
                            train_images_: np.ndarray, train_features_: np.ndarray, train_labels_: np.ndarray,
                            test_images_: np.ndarray, test_features_: np.ndarray, test_labels_: np.ndarray,
                            early_stop: bool = True, model_checkpoint: bool = False,
                            save_folder_name: str = "gg"):

        test_mae, test_mse = [], []

        fl_mse_train_epochend = open(f"metrics/{save_folder_name}/mse_trains_{save_folder_name}_epochend.pickle", "wb")
        fl_mae_train_epochend = open(f"metrics/{save_folder_name}/mae_trains_{save_folder_name}_epochend.pickle", "wb")
        fl_mse_valid_epochend = open(f"metrics/{save_folder_name}/mse_valid_{save_folder_name}_epochend.pickle", "wb")
        fl_mae_valid_epochend = open(f"metrics/{save_folder_name}/mae_valid_{save_folder_name}_epochend.pickle", "wb")

        fl_mse_train = open(f"metrics/{save_folder_name}/mse_trains_{save_folder_name}.pickle", "wb")
        fl_mae_train = open(f"metrics/{save_folder_name}/mae_trains_{save_folder_name}.pickle", "wb")
        fl_mse_valid = open(f"metrics/{save_folder_name}/mse_valid_{save_folder_name}.pickle", "wb")
        fl_mae_valid = open(f"metrics/{save_folder_name}/mae_valid_{save_folder_name}.pickle", "wb")

        for iter_ in range(iters_num):
            # Сборка комбинации случайных гиперпараметров в заданын границах
            print(f"Random Tuning iter #{iter_} started")

            cnn_hp_comb = {}

            for param in hps_cnn:
                if len(hps_cnn[param]) > 1:
                    if any(isinstance(x, bool) for x in hps_cnn[param]) or \
                            any(isinstance(x, str) for x in hps_cnn[param]):
                        cnn_hp_comb[param] = hps_cnn[param][np.random.randint(len(hps_cnn[param]))]
                    else:
                        cnn_hp_comb[param] = np.random.randint(low=min(hps_cnn[param]), high=max(hps_cnn[param]))
                else:
                    cnn_hp_comb[param] = hps_cnn[param][0]

            print(cnn_hp_comb)

            num_epochs = 5

            model = SimpleCNNModel(n_epochs=num_epochs,
                                   n_row=200,
                                   n_col=200,
                                   input_channels=1,
                                   random_seed=1234567890,
                                   n_dict_features=7,
                                   n_trait=2,
                                   data_train=train_images_,
                                   labels_train=train_labels_,
                                   features_train=train_features_,
                                   data_test=test_images_,
                                   features_test=test_features_,
                                   labels_test=test_labels_)

            metrics, model = ComboModelTuner.fit_cv(total_hp={"batch_size_ll": 64, "num_epochs_ll": num_epochs},
                                                    cnn_hp=cnn_hp_comb,
                                                    splits_num=5,
                                                    model=model,
                                                    early_stop=early_stop,
                                                    model_checkpoint=model_checkpoint)

            model.save(f"/content/model_saves/{save_folder_name}/rand_cv_trained_model_iter{iter_}.keras")

            pickle.dump(metrics[0][0], fl_mae_train_epochend)
            pickle.dump(metrics[0][1], fl_mse_train_epochend)
            pickle.dump(metrics[0][2], fl_mae_valid_epochend)
            pickle.dump(metrics[0][3], fl_mse_valid_epochend)

            pickle.dump(metrics[1][0], fl_mae_train)
            pickle.dump(metrics[1][1], fl_mse_train)
            pickle.dump(metrics[1][2], fl_mae_valid)
            pickle.dump(metrics[1][3], fl_mse_valid)

            # считаем ошибку модели на тестовой выборке
            scores_test = model.predict([test_images_, test_features_], test_labels_)

            test_mae.append(scores_test[0])
            test_mse.append(scores_test[1])

            print(f"Random Tuning iter #{iter_} finished successfully")

        with open(f"metrics/{save_folder_name}/mae_test_{save_folder_name}.pickle", "wb") as fl:
            pickle.dump(test_mae, fl)
        with open(f"metrics/{save_folder_name}/mse_test_{save_folder_name}.pickle", "wb") as fl:
            pickle.dump(test_mse, fl)

    @staticmethod
    def grid_hyper_tuning(iters_num: int, hps_cnn: dict,
                          train_images_: np.ndarray, train_features_: np.ndarray, train_labels_: np.ndarray,
                          test_images_: np.ndarray, test_features_: np.ndarray, test_labels_: np.ndarray,
                          early_stop: bool = True, model_checkpoint: bool = False,
                          save_folder_name: str = "gg"):
        cnn_hp_combos = (dict(zip(hps_cnn.keys(), values)) for values in product(*hps_cnn.values()))

        test_mae, test_mse = [], []

        fl_mse_train_epochend = open(f"metrics/{save_folder_name}/mse_trains_{save_folder_name}_epochend.pickle", "wb")
        fl_mae_train_epochend = open(f"metrics/{save_folder_name}/mae_trains_{save_folder_name}_epochend.pickle", "wb")
        fl_mse_valid_epochend = open(f"metrics/{save_folder_name}/mse_valid_{save_folder_name}_epochend.pickle", "wb")
        fl_mae_valid_epochend = open(f"metrics/{save_folder_name}/mae_valid_{save_folder_name}_epochend.pickle", "wb")

        fl_mse_train = open(f"metrics/{save_folder_name}/mse_trains_{save_folder_name}.pickle", "wb")
        fl_mae_train = open(f"metrics/{save_folder_name}/mae_trains_{save_folder_name}.pickle", "wb")
        fl_mse_valid = open(f"metrics/{save_folder_name}/mse_valid_{save_folder_name}.pickle", "wb")
        fl_mae_valid = open(f"metrics/{save_folder_name}/mae_valid_{save_folder_name}.pickle", "wb")

        for i, tmp_hps_cnn in enumerate(islice(cnn_hp_combos, iters_num)):

            num_epochs = 5

            tmp_hps_cnn

            model = SimpleCNNModel(n_epochs=num_epochs,
                                   n_row=200,
                                   n_col=200,
                                   input_channels=1,
                                   random_seed=1234567890,
                                   n_dict_features=7,
                                   n_trait=2,
                                   data_train=train_images_,
                                   labels_train=train_labels_,
                                   features_train=train_features_,
                                   data_test=test_images_,
                                   features_test=test_features_,
                                   labels_test=test_labels_)

            metrics, model = ComboModelTuner.fit_cv(total_hp={"batch_size_ll": 64, "num_epochs_ll": num_epochs},
                                                    cnn_hp=tmp_hps_cnn,
                                                    splits_num=5,
                                                    model=model,
                                                    early_stop=early_stop,
                                                    model_checkpoint=model_checkpoint)

            pickle.dump(metrics[0][0], fl_mae_train_epochend)
            pickle.dump(metrics[0][1], fl_mse_train_epochend)
            pickle.dump(metrics[0][2], fl_mae_valid_epochend)
            pickle.dump(metrics[0][3], fl_mse_valid_epochend)

            pickle.dump(metrics[1][0], fl_mae_train)
            pickle.dump(metrics[1][1], fl_mse_train)
            pickle.dump(metrics[1][2], fl_mae_valid)
            pickle.dump(metrics[1][3], fl_mse_valid)

            model.save(f"/content/model_saves/{save_folder_name}/grid_cv_trained_model_iter{i}.keras")

            # считаем ошибку модели на тестовой выборке
            scores_test = model.evaluate([test_images_, test_features_], test_labels_)

            test_mae.append(scores_test[0])
            test_mse.append(scores_test[1])
            print(f"Grid Tuning iter #{i + 1} finished successfully")

        with open(f"metrics/{save_folder_name}/mae_test_{save_folder_name}.pickle", "wb") as fl:
            pickle.dump(test_mae, fl)
        with open(f"metrics/{save_folder_name}/mse_test_{save_folder_name}.pickle", "wb") as fl:
            pickle.dump(test_mse, fl)

In [None]:
# задаем сетку гиперпараметров для
import pickle

model_hp = {# сначала идут параметры сверточной части модели
            'first_conv2d_out_channels': [32, 64],
            'first_conv2d_kernel_size': [3, 5, 7],
            'first_conv2d_activation': ['tanh', 'relu'],
            'need_extra_conv2d': [False, True],
            'extra_conv2d_out_channels': [32, 64],
            'extra_conv2d_kernel_size': [3, 5, 7],
            'extra_conv2d_activation': ['tanh', 'relu'],
            'need_batch_norm_after_first_conv2d': [True, False],
            'second_conv2d_kernel_size': [3, 5],
            'second_conv2d_out_channels': [64, 128],
            'second_conv2d_activation': ['tanh', 'relu'],
            'need_batch_norm_after_second_conv2d': [True, False],
            'dense_output_activation': ['sigmoid', 'linear'],
            'use_gap_1_or_flatten_0': [1, 0],
            'need_deconv_block': [False, True],
            'num_feature_output': [64, 128, 256],
        }

# ComboModelTuner.random_hyper_tuning(10, model_hp,
#                                     train_images_=train_images,
#                                     train_labels_=train_labels,
#                                     train_features_=train_dict,
#                                     test_images_=test_images,
#                                     test_labels_=test_labels,
#                                     test_features_=test_dict,
#                                     save_folder_name="height_crop_test")

ComboModelTuner.grid_hyper_tuning(3, model_hp,
                                  train_images_=train_images,
                                  train_labels_=train_labels,
                                  train_features_=train_dict,
                                  test_images_=test_images,
                                  test_labels_=test_labels,
                                  test_features_=test_dict,
                                  early_stop=True,
                                  model_checkpoint=False,
                                  save_folder_name="crop_yellow_rust")

# сохраним индексы разбиения на обучающий и тестовый наборы данных. Сначала идет
# обучающий набор (train), а затем тестовый (test)
with open("/content/indices/crop_yellow_rust/train_test_split.txt", "wb") as fl:
    pickle.dump(train_indices, fl)
    pickle.dump(test_indices, fl)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Fold #1 finished succesfully
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 4: early stopping
Fold #2 finished succesfully
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 5: early stopping
Fold #3 finished succesfully
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 5: early stopping
Fold #4 finished succesfully
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 5: early stopping
Fold #5 finished succesfully
Grid Tuning iter #1 finished successfully
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Fold #1 finished succesfully
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 3: early stopping
Fold #2 finished succesfully
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 5: early stopping
Fold #3 finished succesfully
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 5: early stopping
Fold #4 finished succesfully
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 4: early stopping
Fold #5 finished succesfully
Grid Tuning iter

In [None]:
# архивирование и загрузка истории обучения модели + индексирования образцов на локальную машину
!cd /contents
!zip -r model_checkpoints.zip model_saves/crop_yellow_rust
!zip -r metrics.zip metrics/crop_yellow_rust
!zip -r train_test_indices.zip indices/crop_yellow_rust

from google.colab import files
files.download('model_checkpoints.zip')
files.download('train_test_indices.zip')
files.download('metrics.zip')

/bin/bash: line 1: cd: /contents: No such file or directory
  adding: model_saves/crop_yellow_rust/ (stored 0%)
  adding: model_saves/crop_yellow_rust/grid_cv_trained_model_iter0.keras (deflated 27%)
  adding: model_saves/crop_yellow_rust/grid_cv_trained_model_iter2.keras (deflated 27%)
  adding: model_saves/crop_yellow_rust/grid_cv_trained_model_iter1.keras (deflated 27%)
  adding: metrics/crop_yellow_rust/ (stored 0%)
  adding: metrics/crop_yellow_rust/mae_trains_crop_yellow_rust.pickle (deflated 66%)
  adding: metrics/crop_yellow_rust/crop_brown_rust/ (stored 0%)
  adding: metrics/crop_yellow_rust/crop_brown_rust/train_test_split.txt (deflated 73%)
  adding: metrics/crop_yellow_rust/mse_trains_crop_yellow_rust_epochend.pickle (deflated 43%)
  adding: metrics/crop_yellow_rust/mae_trains_crop_yellow_rust_epochend.pickle (deflated 44%)
  adding: metrics/crop_yellow_rust/mse_valid_crop_yellow_rust_epochend.pickle (deflated 42%)
  adding: metrics/crop_yellow_rust/mae_valid_crop_yellow_ru

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>