In [67]:
import numpy as np 
import pandas as pd 
import os
import gc
import random
import time

import tensorflow as tf
import efficientnet.tfkeras as ef
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array
import tensorflow.keras.models as M
from tensorflow.keras import backend as K
import tensorflow.keras.layers as L
from tensorflow.keras import optimizers
from tensorflow.keras.models import load_model
from tensorflow.keras import utils
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import LabelEncoder

In [37]:
import matplotlib
import sklearn
print("pandas version: ", pd.__version__)
print("numpy version: ", np.__version__)
print("sklearn version: ", sklearn.__version__)
print("matplotlib version: ", matplotlib.__version__)
print("tensorflow version: ", tf.__version__)
print("efficientnet version: 1.1.1")

pandas version:  1.2.2
numpy version:  1.19.5
sklearn version:  0.23.2
matplotlib version:  3.3.2
tensorflow version:  2.4.1
efficientnet version: 1.1.1


###### подробнее про библиотеку efficientnet: https://pypi.org/project/keras-efficientnets/

In [None]:
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true' #gpu
#Whether or not to load truncated image files
ImageFile.LOAD_TRUNCATED_IMAGES = True
#Image size exceeds not limit
Image.MAX_IMAGE_PIXELS = None

In [40]:
seed=42    
random.seed(seed)
np.random.seed(seed)
tf.random.set_seed(seed)

In [41]:
# кастомная функция для f1
def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

Фильтрую дф по наличию изображения в каталоге

In [23]:
list_images = list()
for filename in os.listdir("images"):
    if filename.endswith(".jpg"):
        list_images.append(filename)

In [42]:
train = pd.read_csv('train.csv') 
train['image_id']  = train['guid'] + '.jpg'
train['have_image'] = train['image_id'].apply(lambda x: 1 if x in list_images else 0)
train = train.query("have_image==1").reset_index(drop=True)

In [61]:
IMG_SIZE = 512
input_shape=(IMG_SIZE, IMG_SIZE, 3)
AUTO = tf.data.experimental.AUTOTUNE
batch_size = 8
num_epochs = 30
LR = 0.00007

In [62]:
datagen = ImageDataGenerator(rescale=1./(IMG_SIZE - 1))

In [63]:
def create_datagen(df):
    return datagen.flow_from_dataframe(
        dataframe=df,
        directory="images",
        x_col="image_id",
        y_col="typology",
        target_size=(IMG_SIZE, IMG_SIZE),
        batch_size=batch_size,
        class_mode='categorical',
        #validation_split=0.15,
        shuffle=True, 
        seed=seed)

Загружу общедоступные предобученные веса на Imagenet на архитектуре EfficientNetB6, на основе этой архитерктуры достроим нейронную сеть подробнее про архитектуру https://paperswithcode.com/paper/fixing-the-train-test-resolution-discrepancy-2

In [70]:
#base_model = efn.EfficientNetB6(weights="imagenet", include_top=False, input_shape=input_shape)
#base_model.save("EfficientNetB6.h5")

In [64]:
def architecture_NN():
    inp = L.Input(shape=input_shape)
    base_model = efn.EfficientNetB6(weights="EfficientNetB6.h5", include_top=False, input_shape=input_shape)
    base_model.trainable = False
    x = base_model(inp)
    x = L.GlobalAveragePooling2D()(x)
    bn = L.Dense(50, activation = 'relu', kernel_initializer='he_uniform')(x)
    out = L.Dense(15, activation = 'softmax', kernel_initializer='glorot_uniform')(bn)
    model = tf.keras.models.Model(inputs = inp, outputs = out)
    return model

In [65]:
architecture_NN().summary()

Model: "model_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_22 (InputLayer)        [(None, 512, 512, 3)]     0         
_________________________________________________________________
efficientnet-b6 (Functional) (None, 16, 16, 2304)      40960136  
_________________________________________________________________
global_average_pooling2d_5 ( (None, 2304)              0         
_________________________________________________________________
dense_10 (Dense)             (None, 300)               691500    
_________________________________________________________________
dense_11 (Dense)             (None, 15)                4515      
Total params: 41,656,151
Trainable params: 696,015
Non-trainable params: 40,960,136
_________________________________________________________________


время обучения на 3060 около 3 часов

In [None]:
start_time = time.time()
es = EarlyStopping(monitor='val_f1_m', patience=6, restore_best_weights=True, verbose=1, mode='max')
skf = StratifiedKFold(n_splits=8, shuffle=True, random_state=seed)
for fold, (idxT, idxV) in enumerate(skf.split(train, train['typology'])):
    train_generator = create_datagen(train.iloc[idxT])
    valid_generator = create_datagen(train.iloc[idxV])
    model_label, model_bn = architecture_NN()
    model_label.compile(loss="categorical_crossentropy", optimizer=optimizers.Adam(LR), metrics=[f1_m])
    history = model_label.fit(train_generator, 
                        validation_data=valid_generator,
                        callbacks=[es],
                        epochs = num_epochs)
    model_label.save(f"Model_img/model_predict_with_bottle_neck_{fold}.h5")
    model_bn.save(f"model_img/model_bottle_neck_{fold}.h5")
print(f"hours: {(time.time()-start_time)/3600}")

Found 3804 validated image filenames belonging to 15 classes.
Found 544 validated image filenames belonging to 15 classes.
Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70
Epoch 16/70
Epoch 17/70
Epoch 18/70
Epoch 19/70
Epoch 20/70
Epoch 21/70
Epoch 22/70
Epoch 23/70
Epoch 24/70
Epoch 25/70
Epoch 26/70
Restoring model weights from the end of the best epoch.
Epoch 00026: early stopping
Found 3804 validated image filenames belonging to 15 classes.
Found 544 validated image filenames belonging to 15 classes.
Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Restoring model weights from the end of the best epoch.
Epoch 00011: early stopping
Found 3804 validated image filenames belonging to 15 classes.
Found 544 validated image filenames belonging to 15 classes.
Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoc

Found 3804 validated image filenames belonging to 15 classes.
Found 544 validated image filenames belonging to 15 classes.
Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70
Epoch 16/70
Epoch 17/70
Restoring model weights from the end of the best epoch.
Epoch 00017: early stopping
Found 3805 validated image filenames belonging to 15 classes.
Found 543 validated image filenames belonging to 15 classes.
Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70
Epoch 16/70
Epoch 17/70
Epoch 18/70
Epoch 19/70
Epoch 20/70
Epoch 21/70
Epoch 22/70
Restoring model weights from the end of the best epoch.
Epoch 00022: early stopping
Found 3805 validated image filenames belonging to 15 classes.
Found 543 validated image filenames belonging to 15 classes.
Epoch 1/70
Ep

Found 3805 validated image filenames belonging to 15 classes.
Found 543 validated image filenames belonging to 15 classes.
Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70
Epoch 16/70
Epoch 17/70
Epoch 18/70
Epoch 19/70
Restoring model weights from the end of the best epoch.
Epoch 00019: early stopping
Found 3805 validated image filenames belonging to 15 classes.
Found 543 validated image filenames belonging to 15 classes.
Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70
Epoch 16/70
Epoch 17/70
Epoch 18/70
Epoch 19/70
Epoch 20/70
Epoch 21/70