In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.keras import layers, models

2025-03-21 23:11:21.403149: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1742573481.415966   24079 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1742573481.419823   24079 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-21 23:11:21.433537: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
print("GPU доступен." if tf.config.list_physical_devices('GPU') else "GPU недоступен.")

GPU доступен.


In [3]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
tf.random.set_seed(
    42
)

In [4]:
SIZE=512
SHAPE=(SIZE, SIZE)

In [5]:
import json
import cv2

def get_image_pool(sources):
    targets = []
    for source in sources:
        targets = targets + [os.path.join(source, f).split('.')[0] for f in os.listdir(source) if os.path.isfile(os.path.join(source, f)) and f.split('.')[-1] == 'png' ]
    actual_shape = SHAPE
    images = []
    boxes = []
    for target in targets:
        json_fn = target + '.json'
        image_fn = target + '.png'
        with open(json_fn) as f:
            image_info = json.load(f)
        if image_info['skip']:
            continue
        image = cv2.cvtColor(cv2.imread(image_fn), cv2.COLOR_BGR2GRAY)
        origin_shape = image.shape
        adapt_scale = (actual_shape[0] / origin_shape[0], actual_shape[1] / origin_shape[1])
        scale = image_info['scale']
        
        box = image_info['box']
        y_min = (min(box[0][1], box[1][1]) / scale * adapt_scale[0]) / SIZE
        y_max = (max(box[0][1], box[1][1]) / scale * adapt_scale[0]) / SIZE
        x_min = (min(box[0][0], box[1][0]) / scale * adapt_scale[1]) / SIZE
        x_max = (max(box[0][0], box[1][0]) / scale * adapt_scale[1]) / SIZE
        
        images.append(cv2.resize(image, SHAPE))
        boxes.append((x_min, x_max, y_min, y_max))
    return np.array(images), np.array(boxes)

In [6]:
base_img_dir = os.environ["BASE_IMG_DIR"]

images, boxes = get_image_pool([base_img_dir + '/vindr-spinexr-train', base_img_dir + '/vindr-spinexr-test'])
images.shape, images[0].shape

((1853, 512, 512), (512, 512))

In [7]:
X, y = images, boxes

In [8]:
def calculate_iou(y_true, y_pred):
    # Разделяем координаты
    true_xmin, true_xmax, true_ymin, true_ymax = tf.split(y_true, 4, axis=-1)
    pred_xmin, pred_xmax, pred_ymin, pred_ymax = tf.split(y_pred, 4, axis=-1)

    # Вычисляем координаты пересечения
    intersect_xmin = tf.maximum(true_xmin, pred_xmin)
    intersect_ymin = tf.maximum(true_ymin, pred_ymin)
    intersect_xmax = tf.minimum(true_xmax, pred_xmax)
    intersect_ymax = tf.minimum(true_ymax, pred_ymax)

    # Вычисляем площадь пересечения
    intersect_width = tf.maximum(0.0, intersect_xmax - intersect_xmin)
    intersect_height = tf.maximum(0.0, intersect_ymax - intersect_ymin)
    intersect_area = intersect_width * intersect_height

    # Вычисляем площади истинного и предсказанного прямоугольников
    true_area = (true_xmax - true_xmin) * (true_ymax - true_ymin)
    pred_area = (pred_xmax - pred_xmin) * (pred_ymax - pred_ymin)

    # Вычисляем объединение
    union_area = true_area + pred_area - intersect_area

    # Вычисляем IoU
    # (Добавляем epsilon для избежания деления на 0)
    iou = intersect_area / (union_area + tf.keras.backend.epsilon())
    return iou

@tf.keras.utils.register_keras_serializable()
class IoUMetric(tf.keras.metrics.Metric):
    def __init__(self, name="iou", **kwargs):
        super(IoUMetric, self).__init__(name=name, **kwargs)
        self.iou = self.add_weight(name="iou", initializer="zeros")
        self.total_samples = self.add_weight(name="total_samples", initializer="zeros")

    def update_state(self, y_true, y_pred, sample_weight=None):
        iou = calculate_iou(y_true, y_pred)
        self.iou.assign_add(tf.reduce_sum(iou))
        self.total_samples.assign_add(tf.cast(tf.shape(y_true)[0], tf.float32))

    def result(self):
        return self.iou / self.total_samples

    def reset_states(self):
        self.iou.assign(0.0)
        self.total_samples.assign(0.0)

In [9]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import LearningRateScheduler

def create_model(input_shape):
    model = models.Sequential([
        layers.Input(shape=input_shape),
        layers.Conv2D(32, (3, 3), activation='relu', padding='same'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(128, activation='relu', kernel_regularizer='l2'),
        layers.Dense(4, activation='sigmoid')
    ])
    return model

input_shape = (SIZE, SIZE, 1)
custom_model = create_model(input_shape)
metrics = [
    'mae',
    IoUMetric()
]
custom_model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=[IoUMetric(), 'mae'])

I0000 00:00:1742573545.353381   24079 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 5468 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3060, pci bus id: 0000:2b:00.0, compute capability: 8.6


In [10]:
def lr_scheduler(epoch, lr):
    if epoch < 10:
        return lr
    else:
        return lr * 0.8
history = custom_model.fit(X, y, epochs=5, batch_size=8, callbacks=[LearningRateScheduler(lr_scheduler)], validation_split=0.2)

Epoch 1/5


I0000 00:00:1742573547.220128   24219 service.cc:148] XLA service 0x7fcbd000d640 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1742573547.220150   24219 service.cc:156]   StreamExecutor device (0): NVIDIA GeForce RTX 3060, Compute Capability 8.6
2025-03-21 23:12:27.241787: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1742573547.360377   24219 cuda_dnn.cc:529] Loaded cuDNN version 90300
2025-03-21 23:12:27.391998: W external/local_xla/xla/service/gpu/nvptx_compiler.cc:930] The NVIDIA driver's CUDA version is 12.4 which is older than the PTX compiler version 12.5.82. Because the driver is older than the PTX compiler version, XLA is disabling parallel compilation, which may slow down compilation. You should update your NVIDIA driver or use the NVIDIA-provided CUDA forward compatibility packages.


[1m  2/186[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m14s[0m 79ms/step - iou: 0.0564 - loss: 152.4325 - mae: 0.5215  

I0000 00:00:1742573552.421808   24219 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m186/186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 89ms/step - iou: 0.3799 - loss: 45.1310 - mae: 0.1452 - val_iou: 0.6140 - val_loss: 4.6203 - val_mae: 0.0536 - learning_rate: 0.0010
Epoch 2/5
[1m186/186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 69ms/step - iou: 0.5680 - loss: 4.0296 - mae: 0.0549 - val_iou: 0.6188 - val_loss: 2.7031 - val_mae: 0.0458 - learning_rate: 0.0010
Epoch 3/5
[1m186/186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 69ms/step - iou: 0.6160 - loss: 2.4371 - mae: 0.0483 - val_iou: 0.5920 - val_loss: 1.7898 - val_mae: 0.0488 - learning_rate: 0.0010
Epoch 4/5
[1m186/186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 68ms/step - iou: 0.6025 - loss: 1.6430 - mae: 0.0494 - val_iou: 0.6349 - val_loss: 1.2713 - val_mae: 0.0426 - learning_rate: 0.0010
Epoch 5/5
[1m186/186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 69ms/step - iou: 0.6323 - loss: 1.1901 - mae: 0.0447 - val_iou: 0.6574 - val_loss: 0.9799 - val_

In [11]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras import layers, models

base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(512, 512, 3))
base_model.trainable = False
model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(4, activation='sigmoid')
])
metrics = [
    'mae',
    IoUMetric()
]
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=metrics)
X = np.stack((images,)*3, axis=-1)

X.shape, X[0].shape, images[0].shape

((1853, 512, 512, 3), (512, 512, 3), (512, 512))

In [12]:
history = model.fit(X, y, epochs=5, batch_size=8, callbacks=[LearningRateScheduler(lr_scheduler)], validation_split=0.2)

2025-03-21 23:13:41.903935: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 1165492224 exceeds 10% of free system memory.
2025-03-21 23:13:42.867525: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 1165492224 exceeds 10% of free system memory.


Epoch 1/5





[1m185/186[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 71ms/step - iou: 0.3018 - loss: 0.6300 - mae: 0.1376




[1m186/186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90ms/step - iou: 0.3022 - loss: 0.6295 - mae: 0.1373




[1m186/186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 136ms/step - iou: 0.3027 - loss: 0.6291 - mae: 0.1371 - val_iou: 0.6492 - val_loss: 0.4896 - val_mae: 0.0395 - learning_rate: 0.0010
Epoch 2/5
[1m186/186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 87ms/step - iou: 0.5099 - loss: 0.5040 - mae: 0.0627 - val_iou: 0.6896 - val_loss: 0.4872 - val_mae: 0.0341 - learning_rate: 0.0010
Epoch 3/5
[1m186/186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 87ms/step - iou: 0.5848 - loss: 0.4949 - mae: 0.0482 - val_iou: 0.7137 - val_loss: 0.4859 - val_mae: 0.0305 - learning_rate: 0.0010
Epoch 4/5
[1m186/186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 86ms/step - iou: 0.6142 - loss: 0.4932 - mae: 0.0438 - val_iou: 0.6619 - val_loss: 0.4868 - val_mae: 0.0354 - learning_rate: 0.0010
Epoch 5/5
[1m186/186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 85ms/step - iou: 0.6168 - loss: 0.4932 - mae: 0.0438 - val_iou: 0.6939 - val_loss: 0.4863 - val_

In [13]:
import matplotlib.pyplot as plt
def show_res(image, model_, do_color_reshape=False):
    origin_shape = image.shape
    image_reshaped = cv2.resize(image, SHAPE)
    if do_color_reshape:
        image_reshaped = np.stack((image_reshaped,)*3, axis=-1)
    image_reshaped = np.expand_dims(image_reshaped, axis=-1)
    image_reshaped = np.expand_dims(image_reshaped, axis=0)
    box = model_.predict(image_reshaped)
    box_ = box[0]
    boxt = [0,0,0,0]
    boxt[0] = int(box_[0] * origin_shape[1])
    boxt[1] = int(box_[1] * origin_shape[1])
    boxt[2] = int(box_[2] * origin_shape[0])
    boxt[3] = int(box_[3] * origin_shape[0])

    points = [(boxt[0], boxt[2]), (boxt[0], boxt[3]), (boxt[1], boxt[2]), (boxt[1], boxt[3])]
    overlay_points = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
    cv2.rectangle(overlay_points, points[0], points[-1], color=(255, 0, 0), thickness=int(min(origin_shape) / 100))
    with_box = cv2.addWeighted(overlay_points, 0.5, cv2.cvtColor(image, cv2.COLOR_GRAY2BGR), 0.5, 0)

    plt.figure(figsize=(12, 6))
    plots = 2
    plt.subplot(1, plots, 1)
    plt.imshow(image, cmap='gray')
    plt.axis('off')
    plt.subplot(1, plots, 2)
    plt.imshow(with_box)
    plt.axis('off')
    plt.tight_layout()
    return plt

In [14]:
test_image_name = os.environ["TEST_IMAGE_NAME"]
test_image = cv2.cvtColor(cv2.imread(base_img_dir + '/vindr-spinexr-test/' + test_image_name), cv2.COLOR_BGR2GRAY)

In [15]:
# Скрыто для избежания попадания снимков из закрытого датасета в публичный репозиторий.
# show_res(test_image, custom_model)

In [16]:
# Скрыто для избежания попадания снимков из закрытого датасета в публичный репозиторий.
# show_res(test_image, model, True)