Скачиваем датасет с Я.диска

In [1]:
#pragma dataset init Renopaty --size 4Gb

import requests
from urllib.parse import urlencode
from io import BytesIO
from zipfile import ZipFile

base_url = 'https://cloud-api.yandex.net/v1/disk/public/resources/download?'
public_key = 'https://disk.yandex.ru/d/uHaTRuAlbdbpdQ'

final_url = base_url + urlencode(dict(public_key=public_key))
response = requests.get(final_url)
download_url = response.json()['href']
response = requests.get(download_url)

dist_path = '/home/jupyter/mnt/datasets/Renopaty/'
zipfile = ZipFile(BytesIO(response.content))
zipfile.extractall(path=dist_path)

In [2]:
import keras
import tensorflow as tf
from tensorflow.keras.layers import Layer, Input, Dense, Flatten, Lambda, Conv2D, MaxPooling2D
from tensorflow.keras import applications, losses, optimizers, metrics, Model
from tensorflow.keras.applications.resnet_v2 import preprocess_input
import os
from PIL import Image
import numpy as np
import random
import seaborn as sns
import matplotlib.pyplot as plt

2024-04-10 19:47:32.040252: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-04-10 19:47:33.494034: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
test_paths = '/home/jupyter/datasphere/datasets/Renopaty/Test'
train_paths = '/home/jupyter/datasphere/datasets/Renopaty/Train'

In [4]:
for i in range(5):
  test_paths = f'/home/jupyter/datasphere/datasets/Renopaty/Test/{i}'
  train_paths = f'/home/jupyter/datasphere/datasets/Renopaty/Train/{i}'
  print(len(os.listdir(train_paths)), f'images for training in class {i}')
  print(len(os.listdir(test_paths)), f' images for testing in class {i}')

6408 images for training in class 0
712  images for testing in class 0
2532 images for training in class 1
281  images for testing in class 1
3502 images for training in class 2
389  images for testing in class 2
2879 images for training in class 3
319  images for testing in class 3
2709 images for training in class 4
300  images for testing in class 4


In [5]:
data_dir = '/home/jupyter/datasphere/datasets/Renopaty/Train'
batch_size = 48 
image_size = (380,380)

In [6]:
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
  data_dir,                  # Путь к изображениям
  validation_split = 0.2,    # Устанавливаем разделение на обучающую и проверочную выборки (10%)
  subset = "training",       # Помечаем, что выборка обучающая (90%)
  seed = 123,                # Дополнительное случайное начальное число для перетасовки и преобразований
  image_size = image_size,   # Размер изображений, который был задан ранее
  batch_size = batch_size)   # Размер батча, который был задан ранее

val_ds = tf.keras.preprocessing.image_dataset_from_directory(
  data_dir,                  # Путь к изображениям
  validation_split = 0.2,    # Устанавливаем разделение на обучающую и проверочную выборки (10%)
  subset = "validation",     # Помечаем, что выборка проверочная (10%)
  seed = 123,                # Дополнительное случайное начальное число для перетасовки и преобразований
  image_size = image_size,   # Размер изображений, который был задан ранее
  batch_size = batch_size)   # Размер батча, который был задан ранее
  
                             # Определяем имена классов:
class_names = train_ds.class_names
print(class_names)

Found 18030 files belonging to 5 classes.
Using 14424 files for training.


2024-04-10 19:47:47.531229: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 31136 MB memory:  -> device: 0, name: Tesla V100-SXM2-32GB, pci bus id: 0000:8c:00.0, compute capability: 7.0


Found 18030 files belonging to 5 classes.
Using 3606 files for validation.
['0', '1', '2', '3', '4']


In [7]:
AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE) # Предварительная обучающая выборка
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)     # Предварительная тестовая выборка

print(train_ds)

<_PrefetchDataset element_spec=(TensorSpec(shape=(None, 380, 380, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int32, name=None))>


In [8]:
Resnet = tf.keras.applications.ResNet152V2(
    include_top=False,
    weights='imagenet',
    input_shape=(380, 380, 3),
)

Input = tf.keras.Input(shape=(380, 380, 3))
x = preprocess_input(Input)
x = Resnet(x,training=True)
out_conv = tf.keras.layers.Conv2D(filters=5, kernel_size=1, strides=(1, 1), activation='selu', padding='same', name='out_conv')(x)
gmp = tf.keras.layers.GlobalMaxPool2D()(out_conv)
out = Flatten()(gmp)
Resnet_model = Model(inputs=Input, outputs=[out], name='4Classes')


In [9]:
Resnet_model.summary()

Model: "4Classes"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 380, 380, 3)]     0         
                                                                 
 tf.math.truediv (TFOpLambda  (None, 380, 380, 3)      0         
 )                                                               
                                                                 
 tf.math.subtract (TFOpLambd  (None, 380, 380, 3)      0         
 a)                                                              
                                                                 
 resnet152v2 (Functional)    (None, 12, 12, 2048)      58331648  
                                                                 
 out_conv (Conv2D)           (None, 12, 12, 5)         10245     
                                                                 
 global_max_pooling2d (Globa  (None, 5)                0  

In [10]:
def plot_graph(train_acc, val_acc, train_loss, val_loss):
  '''
  Функция выведет на экран графики точности и ошибки на проверочной и обучающей выборках.
  '''
  sns.set(style='darkgrid', palette='dark') 
  plt.figure(figsize=(16, 10))
  plt.subplot(2, 2, 1)
  plt.title('Точность', fontweight='bold')
  plt.plot(train_acc, label='Точность на обучащей выборке')
  plt.plot(val_acc, label='Точность на проверочной выборке')
  plt.xlabel('Эпоха обучения')
  plt.ylabel('Доля верных ответов')
  plt.legend()

  plt.subplot(2, 2, 2)
  plt.title('Ошибка', fontweight='bold')
  plt.plot(train_loss, label='Ошибка на обучающей выборке')
  plt.plot(val_loss, label='Ошибка на проверочной выборке')
  plt.xlabel('Эпоха обучения')
  plt.ylabel('Ошибка')
  plt.legend()

  plt.show()

In [11]:
base_learning_rate = 0.0001                                                                        
# Компилируем модель:
Resnet_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=base_learning_rate),  # Оптимизатор Adam c заданным ранее шагом обучения
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),  # Ошибка SparseCategoricalCrossentropy
              metrics=['accuracy'])                                                  # Метрика Accuracy

In [12]:
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, CSVLogger

In [13]:
import gc
gc.collect()

20581

In [None]:
epochs = 15    

#Задаем коллбеки  
history_path = '/home/jupyter/datasphere/project/model_history_log2.csv'
weights_path = '/home/jupyter/datasphere/project/MobileNetV1.h5'

csv_logger = CSVLogger(history_path, append=True)
# уменьшение lr
reduceLROnPlateau = ReduceLROnPlateau(monitor='val_loss', factor=0.7, patience=3, verbose=1)
# сохранение весов
model_checkpoint = ModelCheckpoint(weights_path, monitor='val_loss', save_best_only=True, verbose=1)


Retina_Resnet = Resnet_model.fit(train_ds,                 # Обучающая выборка
                        epochs=epochs,            # Количество эпох обучения, заданное ранее
                        validation_data=val_ds,
                        callbacks=[csv_logger,
                                   reduceLROnPlateau,
                                   model_checkpoint])   # Проверочная выборка

                                                    # После того, как пройдет обучение, выведем графики точности и ошибки:
plot_graph(Retina_Resnet.history['accuracy'],
           Retina_Resnet.history['val_accuracy'],
           Retina_Resnet.history['loss'],
           Retina_Resnet.history['val_loss'])

2024-04-10 19:47:58.491236: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [14424]
	 [[{{node Placeholder/_4}}]]
2024-04-10 19:47:58.491852: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [14424]
	 [[{{node Placeholder/_4}}]]


Epoch 1/15


2024-04-10 19:48:39.030721: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8900
2024-04-10 19:48:42.641882: I tensorflow/compiler/xla/service/service.cc:169] XLA service 0x7f4c5b851c40 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2024-04-10 19:48:42.641929: I tensorflow/compiler/xla/service/service.cc:177]   StreamExecutor device (0): Tesla V100-SXM2-32GB, Compute Capability 7.0
2024-04-10 19:48:42.793146: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-04-10 19:48:43.764722: I ./tensorflow/compiler/jit/device_compiler.h:180] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.




2024-04-10 19:54:09.098013: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [3606]
	 [[{{node Placeholder/_4}}]]
2024-04-10 19:54:09.098414: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [3606]
	 [[{{node Placeholder/_0}}]]



Epoch 1: val_loss improved from inf to 1.18135, saving model to /home/jupyter/datasphere/project/MobileNetV1.h5
Epoch 2/15
Epoch 2: val_loss improved from 1.18135 to 1.04591, saving model to /home/jupyter/datasphere/project/MobileNetV1.h5
Epoch 3/15
Epoch 3: val_loss improved from 1.04591 to 1.04223, saving model to /home/jupyter/datasphere/project/MobileNetV1.h5
Epoch 4/15
Epoch 4: val_loss did not improve from 1.04223
Epoch 5/15
Epoch 5: val_loss did not improve from 1.04223
Epoch 6/15
Epoch 6: ReduceLROnPlateau reducing learning rate to 6.999999823165126e-05.

Epoch 6: val_loss did not improve from 1.04223
Epoch 7/15
 57/301 [====>.........................] - ETA: 3:35 - loss: 0.4552 - accuracy: 0.8249

In [None]:
test_dir = '/home/jupyter/datasphere/datasets/Renopaty/Test/'

In [None]:
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
  test_dir,                  # Путь к изображениям
  validation_split = 0.9999,    # Устанавливаем разделение на обучающую и проверочную выборки (10%)
  subset = "training",       # Помечаем, что выборка обучающая (90%)
  seed = 123,                # Дополнительное случайное начальное число для перетасовки и преобразований
  image_size = image_size,   # Размер изображений, который был задан ранее
  batch_size = batch_size)   # Размер батча, который был задан ранее

In [None]:
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)     # Предварительная тестовая выборка

In [None]:
#Проверка модели на тестовых картинках не участвовавших в обучении
scores = Resnet_model.evaluate(val_ds, verbose=1)