In [2]:
import torch
from torch import nn

import glob
import os
from tqdm import tqdm
from datetime import datetime
import json

from argparse import ArgumentParser

from itertools import combinations

import torchvision
from torchvision.transforms import v2
from torchvision import tv_tensors
from torchvision import models

import segmentation_models_pytorch as smp

import lightning as L
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch.loggers import CSVLogger

from torchmetrics import classification
from torchmetrics import segmentation

from sklearn.model_selection import train_test_split
from sklearn import metrics

import numpy as np

import pandas as pd

In [18]:
confusion = classification.ConfusionMatrix(task='multiclass', num_classes=5)
target = torch.tensor([2, 1, 0, 0])
preds = torch.tensor([2, 1, 0, 1])
confusion.update(preds, target)

target = torch.tensor([3, 4, 2, 1])
preds = torch.tensor([4, 3, 0, 1])
confusion.update(preds, target)

confusion.compute()


tensor([[1, 1, 0, 0, 0],
        [0, 2, 0, 0, 0],
        [1, 0, 1, 0, 0],
        [0, 0, 0, 0, 1],
        [0, 0, 0, 1, 0]])

In [16]:

torchmetrics.segmentation

AttributeError: module 'torchmetrics' has no attribute 'segmentation'

In [31]:
iou = segmentation.DiceScore(average='none', num_classes=5, input_format='index')

target = torch.tensor([[2, 1, 0, 1]])
preds = torch.tensor([[2, 1, 0, 1]])
iou.update(preds, target)

target = torch.tensor([[3, 4, 2, 1]])
preds = torch.tensor([[4, 3, 0, 1]])
#iou.update(preds, target)
iou.compute()

tensor([1., 1., 1., 0., 0.])

In [32]:
target.reshape(-1)

tensor([3, 4, 2, 1])

# Работа с данными

In [3]:
# datasets
class SegmentationDataset(torch.utils.data.Dataset):
    def __init__(self, path_to_dataset_root:str, samples_df:pd.DataFrame, channel_indices:list, transforms:v2._transform.Transform, device:torch.device):
        '''
        In:
            path_to_dataset_root - путь до корневой папки с датасетом
            samples_df - pandas.DataFrame с информацией о файлах
            channel_indices - список с номерами каналов мультиспектрального изображения
            transforms - аугментация изображений
        '''
        super().__init__()
        self.path_to_dataset_root = path_to_dataset_root
        self.samples_df = samples_df
        self.channel_indices = channel_indices
        self.transforms = transforms
        self.device = device

    def __len__(self):
        return len(self.samples_df)

    def __getitem__(self, idx):
        sample = self.samples_df.iloc[idx]

        file_name = sample['file_name']

        path_to_image = os.path.join(self.path_to_dataset_root, 'images', f'{file_name}.npy')
        path_to_labels = os.path.join(self.path_to_dataset_root, 'labels', f'{file_name}.npy')

        image = torch.as_tensor(np.load(path_to_image), dtype=torch.int16)[self.channel_indices]
        #image = np.load(path_to_image)
        # метки читаем как одноканальное изображение
        label = np.load(path_to_labels)
        label = np.where(label >= 0, label, 0)
        #label = torch.as_tensor(np.load(path_to_labels), dtype=torch.uint8).long()
        label = torch.as_tensor(label, dtype=torch.uint8).long()
        
        
        image = tv_tensors.Image(image, device=self.device)
        label = tv_tensors.Mask(label, device=self.device)

        transforms_dict = {'image':image, 'mask':label}
        transformed = self.transforms(transforms_dict)
        return transformed['image'], transformed['mask']#, image

# Описание нейронных сетей

In [60]:
def compute_pred_mask(pred):
    '''
    Определение маски классов на основе сгенерированной softmax маски
    '''
    #pred = pred.detach()
    _, pred_mask = pred.max(dim=1)
    return pred_mask#.cpu().numpy()

class SegmentationModule(L.LightningModule):
    def __init__(self, model:nn.Module, criterion:nn.Module, optimizer_cfg:dict, metrics_dict:dict, name2class_idx_dict:dict) -> None:
        '''
        Модуль Lightning для обучения сегментационной сети
        In:
            model - нейронная сеть
            criterion - функция потерь
            
            name2class_idx_dict - словарь с отображением {class_name(str): class_idx(int)}
        '''
        super().__init__()
        self.model = model
        self.criterion = criterion
        self.optimizer_cfg = optimizer_cfg
        self.metrics_dict = metrics_dict
        
        self.name2class_idx_dict = name2class_idx_dict
        # словарь, выполняющий обратное отображение class_idx в class_name
        self.class_idx2name_dict = {v:k for k, v in name2class_idx_dict.items()}
        
    def configure_optimizers(self):
        optimizer = self.optimizer_cfg['optmizer'](self.parameters(), **self.optimizer_cfg['optimizer_params'])
        ret_dict = {'optimizer': optimizer}
        if self.optimizer_cfg['lr_scheduler'] is not None:
            scheduler = self.optimizer_cfg['lr_scheduler'](optimizer, **self.optimizer_cfg['lr_scheduler'])
            ret_dict['lr_scheduler'] = scheduler
        
        return ret_dict

    def compute_metrics(self, pred_labels, true_labels, mode):
        metrics_names_list = self.metrics_dict[mode].keys()
        for metric_name in metrics_names_list:
            if 'dice' in metric_name.lower():
                self.metrics_dict[mode][metric_name].update(pred_labels, true_labels)
            else:
                self.metrics_dict[mode][metric_name].update(pred_labels.reshape(-1), true_labels.reshape(-1))
        
    
    def training_step(self, batch, batch_idx):
        data, true_labels = batch
        pred = self.model(data)
        loss = self.criterion(pred, true_labels)
        # вычисление сгенерированной маски
        pred_labels = compute_pred_mask(pred)
        #true_labels = true_labels.detach().cpu().numpy()
        
        self.compute_metrics(pred_labels=pred_labels, true_labels=true_labels, mode='train')

        # т.к. мы вычисляем общую ошибку на всей эпохе, то записываем в лог только значение функции потерь
        self.log('train_loss', loss, on_step=False, on_epoch=True, prog_bar=True)
        return loss
    
    def validation_step(self, batch, batch_idx):
        data, true_labels = batch
        pred = self.model(data)
        loss = self.criterion(pred, true_labels)
        pred_labels = compute_pred_mask(pred)
        self.compute_metrics(pred_labels=pred_labels, true_labels=true_labels, mode='val')
        self.log('val_loss', loss, on_step=False, on_epoch=True, prog_bar=True)
        return loss
    
    def log_metrics(self, mode):
        for metric_name, metric in self.metrics_dict[mode].items():
            metric_val = metric.compute()
            if 'confusion' in metric_name.lower():
                disp_name = f'{mode}_{metric_name}'
                self.log(disp_name, metric_val, on_step=False, on_epoch=True, prog_bar=False)
            else:
                for i, value in enumerate(metric_val):
                    class_name = self.class_idx2name_dict[i]
                    disp_name = f'{mode}_{metric_name}_{class_name}'
                    self.log(disp_name, value, on_step=False, on_epoch=True, prog_bar=True)
                disp_name = f'{mode}_{metric_name}_mean'
                self.log(disp_name, metric_val.mean(), on_step=False, on_epoch=True, prog_bar=True)
            self.metrics_dict[mode][metric_name].reset()

    def on_train_epoch_end(self):
        '''
        Декодирование результатов тренировочной эпохи и запись их в лог
        '''
        self.log_metrics(mode='train')
 
    def on_validation_epoch_end(self):
        '''
        Декодирование результатов тестовой эпохи и запись их в лог
        (работает точно также, как и )
        '''
        self.log_metrics(mode='val')

In [58]:
lst = [0.8390797972679138,0.6945005655288696,0.28147169947624207,0.16609252989292145,0.36803194880485535,0.7777265906333923,0.0514686144888401,0.4750562012195587,0.5050733685493469,0.26440125703811646,0.8035321235656738]
np.mean(lst)

np.float64(0.47513042695143004)

# Фабрики для создания моделей по конфигурациям

In [10]:
def create_model(config_dict, segmentation_nns_factory_dict):
    model_name = config_dict['segmentation_nn']['nn_architecture']
    # создаем нейронную сеть из фабрики
    model = segmentation_nns_factory_dict[model_name](**config_dict['segmentation_nn']['params'])
    multispecter_bands_indices = config_dict['multispecter_bands_indices']
    in_channels = len(multispecter_bands_indices)
    # замена входного слоя, если кол-во каналов изображения не равно трем
    if in_channels != 3:
        # получаем входной слой, специфический для конкретной нейронной сети
        input_conv = model.get_submodule(
            config_dict['segmentation_nn']['input_layer_params']['layer_path']
            )
        new_input_conv = nn.Conv2d(
            in_channels=in_channels,
            out_channels=input_conv.out_channels,
            kernel_size=input_conv.kernel_size,
            #stride=conv1.stride,
            stride=config_dict['segmentation_nn']['input_layer_params']['stride'],
            #padding=conv1.padding,
            padding=config_dict['segmentation_nn']['input_layer_params']['padding'],
            dilation=input_conv.dilation,
            groups=input_conv.groups,
            bias=input_conv.bias is not None
        )
        if config_dict['segmentation_nn']['params']['encoder_weights'] is not None:
            new_weight = torch.cat([input_conv.weight.mean(dim=1).unsqueeze(1)]*in_channels, dim=1)
            input_conv.weight = nn.Parameter(new_weight)
            if input_conv.bias is not None:
                new_input_conv.bias = input_conv.bias
        # перезаписываем входной слой исходя из специфики оригинальной сети
        model.set_submodule(
            config_dict['segmentation_nn']['input_layer_params']['layer_path'],
            new_input_conv
            )
        
    return model

# Конфигурации

In [82]:
segmentation_nns_factory_dict = {
    'unet': smp.Unet,
    'fpn': smp.FPN,
}

criterion_factory_dict = {
    'crossentropy': nn.CrossEntropyLoss,
}


config_dict = {
    'segmentation_nn': {
        'nn_architecture': 'fpn',
        'params': {
            'encoder_name': "efficientnet-b0",
            'encoder_depth': 5,
            'encoder_weights': "imagenet",
            'decoder_use_norm': "batchnorm",
            'decoder_channels': (256, 128, 64, 32, 16),
            'decoder_attention_type': None,
            'decoder_interpolation': "nearest",
            'in_channels': 3,
            'classes': 11,
            'activation': None,
            'aux_params': None,
        },
        'input_layer_params': {
            'layer_path': 'encoder._conv_stem',
            'stride': (2, 2),
            'padding': (1, 1),
            'layers_num': 1,
        }
    },
    'multispecter_bands_indices': [1, 2, 3, 4, 5, 6, 7, ],
    'input_image_size': 96,
    'loss': {
        'type': 'crossentropy',
        'params': {'weight': 'classes'}
    },
    'device': 'cuda:0',
    'path_to_dataset_root': r'C:\Users\admin\python_programming\DATA\MULTISPECTRAL_DATA_FOR_TRAINIG_NEW'
}







# Создание модели

In [83]:
path_to_dataset_root = config_dict['path_to_dataset_root']

path_to_dataset_info_csv = os.path.join(path_to_dataset_root, 'data_info_table.csv')
path_to_surface_classes_json = os.path.join(path_to_dataset_root, 'surface_classes.json')

input_image_size = config_dict['input_image_size']
multispecter_bands_indices = config_dict['multispecter_bands_indices']
device = config_dict['device']

# чтение списка имен классов поверхностей
with open(path_to_surface_classes_json) as fd:
    surface_classes_list = json.load(fd)
# чтение таблицы с информацией о каждом изображении в выборке
images_df = pd.read_csv(path_to_dataset_info_csv)

path_to_partition_json = os.path.join(path_to_dataset_root, 'dataset_partition.json')
# чтение словаря со списками квадратов, находящихся в обучающей и тестовой выборке
with open(path_to_partition_json) as fd:
    partition_dict = json.load(fd)

# формирование pandas DataFrame-ов с информацией об изображениях обучающей и тестовой выборках
train_images_df = []
for train_square in partition_dict['train_squares']:
    train_images_df.append(images_df[images_df['square_id']==train_square])
train_images_df = pd.concat(train_images_df, ignore_index=True)

test_images_df = []
for test_square in partition_dict['test_squares']:
    test_images_df.append(images_df[images_df['square_id']==test_square])
test_images_df = pd.concat(test_images_df, ignore_index=True)

#train_images_df, test_images_df = train_test_split(images_df, test_size=0.3, random_state=0)

class_num = images_df['class_num'].iloc[0]

# формирование словаря, отображающейго имя класса поверхности в индекс класса
class_name2idx_dict = {n:i for i, n in enumerate(surface_classes_list)}

# вычисление распределений пикселей в классах поверхностей 
classes_pixels_distribution_df = images_df[surface_classes_list]
classes_pixels_num = classes_pixels_distribution_df.sum()
classes_weights = classes_pixels_num / classes_pixels_num.sum()
classes_weights = classes_weights[surface_classes_list].to_numpy().astype(np.float32)

input_image_size = config_dict['input_image_size']
train_transforms = v2.Compose(
    [v2.Resize((input_image_size,input_image_size), antialias=True),v2.ToDtype(torch.float32, scale=True)])
test_transforms = v2.Compose(
    [v2.Resize((input_image_size,input_image_size), antialias=True),v2.ToDtype(torch.float32, scale=True)])

# если ф-ция потерь перекрестная энтропия, то проверяем, есть ли там веса классов
if config_dict['loss']['type'] == 'crossentropy':
    # если в параметрах функции потерь стоит строка 'classes', надо передать в функцию вектор весов классов
    if 'weight' in config_dict['loss']['params']:
        if config_dict['loss']['params']['weight'] == 'classes':
            config_dict['loss']['params']['weight'] = torch.tensor(classes_weights)

# создание функции потерь
criterion = criterion_factory_dict[config_dict['loss']['type']](**config_dict['loss']['params'])

model = create_model(config_dict, segmentation_nns_factory_dict)
model = model.to(device)

# создаем датасеты и даталоадеры
train_dataset = SegmentationDataset(path_to_dataset_root=path_to_dataset_root, samples_df=train_images_df, channel_indices=multispecter_bands_indices, transforms=train_transforms, device=device)
test_dataset = SegmentationDataset(path_to_dataset_root=path_to_dataset_root, samples_df=test_images_df, channel_indices=multispecter_bands_indices, transforms=test_transforms, device=device)
#train_dataset = SegmentationDatasetApplSurf(path_to_dataset_root=path_to_dataset_root, samples_df=test_images_df, channel_indices=channel_indices, name2class_idx_dict=class_name2idx_dict, applicable_surfaces_dict=applicable_surfaces_dict, transforms=test_transforms, device=device)
#test_dataset = SegmentationDatasetApplSurf(path_to_dataset_root=path_to_dataset_root, samples_df=test_images_df, channel_indices=channel_indices, name2class_idx_dict=class_name2idx_dict, applicable_surfaces_dict=applicable_surfaces_dict, transforms=test_transforms, device=device)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=16)

# тестовое чтение данных
for data, labels in test_loader:
    break
    pred = model(data)
    loss = criterion(pred, labels)

# тестовая обработка данных нейронной сетью
ret = model(data)
print(data.shape, ret.shape)

createion_time_str = datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
nn_arch_str = config_dict["segmentation_nn"]["nn_architecture"]
nn_encoder_str = config_dict["segmentation_nn"]["params"]["encoder_name"]
model_name = f'{nn_arch_str}_{nn_encoder_str} {createion_time_str}'
model_name

torch.Size([16, 7, 96, 96]) torch.Size([16, 11, 96, 96])


'fpn_efficientnet-b0 2025-09-04-18-27-31'

# Обучение

In [75]:
epoch_num = 100

print('#############################')
print(model_name)
print('#############################')
print()

# создаем список словарей с информацией о вычисляемых метриках с помощью multiclass confusion matrix
# см. подробнее ддокументацию к функции compute_metric_from_confusion
metrics_dict = {
    'train': {
        'iou': classification.JaccardIndex(task='multiclass', average='none', num_classes=len(class_name2idx_dict)).to(device),
        'precision': classification.Precision(task='multiclass', average='none', num_classes=len(class_name2idx_dict)).to(device),
        'recall': classification.Precision(task='multiclass', average='none', num_classes=len(class_name2idx_dict)).to(device),
    },
    'val': {
        'iou': classification.JaccardIndex(task='multiclass', average='none', num_classes=len(class_name2idx_dict)).to(device),
        'precision': classification.Precision(task='multiclass', average='none', num_classes=len(class_name2idx_dict)).to(device),
        'recall': classification.Precision(task='multiclass', average='none', num_classes=len(class_name2idx_dict)).to(device),
    }
}

optimizer_cfg = {
    'optmizer': torch.optim.Adam,
    'optimizer_params':{},
    'lr_scheduler': None,
    'lr_scheduler_params': {},

}

# Создаем модуль Lightning
segmentation_module = SegmentationModule(model, criterion, optimizer_cfg, metrics_dict, class_name2idx_dict)

# задаем путь до папки с логгерами и создаем логгер, записывающий результаты в csv
path_to_saving_dir = 'saving_dir'
logger = CSVLogger(
    save_dir = path_to_saving_dir,
    name=model_name, 
    flush_logs_every_n_steps=1,
    )

# создаем объект, записывающий в чекпоинт лучшую модель
path_to_save_model_dir = os.path.join(path_to_saving_dir, model_name)
os.makedirs(path_to_save_model_dir, exist_ok=True)
checkpoint_callback = ModelCheckpoint(
    mode="max",
    filename=model_name+"-{epoch:02d}-{val_iou_mean:.3}",
    dirpath=path_to_save_model_dir, 
    save_top_k=1, monitor="val_iou_mean"
    )

trainer = L.Trainer(logger=logger,
        max_epochs=epoch_num, 
        callbacks=[checkpoint_callback],
        accelerator = 'gpu'
        )

# сохраняем конфигурацию
path_to_config = os.path.join(path_to_save_model_dir, 'config.json')
with open(path_to_config, 'w', encoding='utf-8') as fd:
    json.dump(config_dict, fd, indent=4)

trainer.fit(segmentation_module , train_loader, test_loader)

#############################
unet_efficientnet-b0 2025-09-03-19-54-39
#############################



GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\admin\miniconda3\envs\deep_learning\Lib\site-packages\lightning\pytorch\callbacks\model_checkpoint.py:654: Checkpoint directory C:\Users\admin\python_programming\MultispectralSegmentation\saving_dir\unet_efficientnet-b0 2025-09-03-19-54-39 exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type             | Params | Mode 
-------------------------------------------------------
0 | model     | Unet             | 6.3 M  | train
1 | criterion | CrossEntropyLoss | 0      | train
-------------------------------------------------------
6.3 M     Trainable params
0         Non-trainable params
6.3 M     Total params
25.016    Total estimated model params size (MB)
324       Modules in train mode
0         Modules in eval mode


Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]

c:\Users\admin\miniconda3\envs\deep_learning\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


                                                                           

c:\Users\admin\miniconda3\envs\deep_learning\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


Epoch 99: 100%|██████████| 88/88 [01:01<00:00,  1.44it/s, v_num=0, val_loss=1.650, val_iou_UNLABELED=0.785, val_iou_buildings_territory=0.662, val_iou_natural_ground=0.0957, val_iou_natural_grow=0.114, val_iou_natural_wetland=0.227, val_iou_natural_wood=0.533, val_iou_quasi_natural_ground=0.0302, val_iou_quasi_natural_grow=0.421, val_iou_quasi_natural_wetland=0.0479, val_iou_transport=0.139, val_iou_water=0.812, val_iou_mean=0.352, val_precision_UNLABELED=0.923, val_precision_buildings_territory=0.845, val_precision_natural_ground=0.101, val_precision_natural_grow=0.408, val_precision_natural_wetland=0.260, val_precision_natural_wood=0.854, val_precision_quasi_natural_ground=0.040, val_precision_quasi_natural_grow=0.580, val_precision_quasi_natural_wetland=0.0489, val_precision_transport=0.646, val_precision_water=0.906, val_precision_mean=0.510, val_recall_UNLABELED=0.923, val_recall_buildings_territory=0.845, val_recall_natural_ground=0.101, val_recall_natural_grow=0.408, val_recall_

`Trainer.fit` stopped: `max_epochs=100` reached.


Epoch 99: 100%|██████████| 88/88 [01:01<00:00,  1.44it/s, v_num=0, val_loss=1.650, val_iou_UNLABELED=0.785, val_iou_buildings_territory=0.662, val_iou_natural_ground=0.0957, val_iou_natural_grow=0.114, val_iou_natural_wetland=0.227, val_iou_natural_wood=0.533, val_iou_quasi_natural_ground=0.0302, val_iou_quasi_natural_grow=0.421, val_iou_quasi_natural_wetland=0.0479, val_iou_transport=0.139, val_iou_water=0.812, val_iou_mean=0.352, val_precision_UNLABELED=0.923, val_precision_buildings_territory=0.845, val_precision_natural_ground=0.101, val_precision_natural_grow=0.408, val_precision_natural_wetland=0.260, val_precision_natural_wood=0.854, val_precision_quasi_natural_ground=0.040, val_precision_quasi_natural_grow=0.580, val_precision_quasi_natural_wetland=0.0489, val_precision_transport=0.646, val_precision_water=0.906, val_precision_mean=0.510, val_recall_UNLABELED=0.923, val_recall_buildings_territory=0.845, val_recall_natural_ground=0.101, val_recall_natural_grow=0.408, val_recall_

In [None]:
torch.optim