In [1]:
import pandas as pd
import numpy as np
import yaml
import os
import glob

import re

from tqdm import tqdm
import warnings
from matplotlib import pyplot as plt
%matplotlib inline

# Сбор результатов

In [47]:
paths_to_results_csv_list = glob.glob(r'Z:\mikhail_u\segmentation_results_DATCHIKI\*\*\version_0\metrics.csv')
applicable_surfaces = [
    "natural_ground",
    "natural_grow",
    "natural_wetland",
    "natural_wood",
]

non_applicable_surfaces = [
    "buildings_territory",
    "quasi_natural_ground",
    "quasi_natural_grow",
    "quasi_natural_wetland",
    "transport",
    "water",   
]

appl_num = len(applicable_surfaces)
non_appl_num = len(non_applicable_surfaces)

all_results = []
for path_to_metrics_csv in tqdm(paths_to_results_csv_list):
    split_path = path_to_metrics_csv.split(os.sep)
    nn_name = split_path[-3]
    if 'custom_' in nn_name:
        nn_name = nn_name.replace('custom_', '')

    nn_arch, band_stride = nn_name.split('__')
    nn_arch = nn_arch.split('_')
    decoder = nn_arch[0]
    encoder = '_'.join(nn_arch[1:])
    
    match = re.match(r"^b_(.*)_st_(.*)$", band_stride)
    if match:
        band = match.group(1)  # "substr1"
        stride = match.group(2)  # "substr2"

    stride = stride.split(' ')[0]

    decoder_encoder_bands_stride = {
        #'name': nn_name,
        'decoder': decoder,
        'encoder': encoder,
        'bands': band,
        'stride': stride,
    }

    decoder_encoder_bands_stride = pd.Series(data=decoder_encoder_bands_stride)

    metrics_df = pd.read_csv(path_to_metrics_csv)
    val_metrics = [c for c in metrics_df.columns if c.startswith('val_')]
    best_val_results = metrics_df[metrics_df['val_iou_mean']==metrics_df['val_iou_mean'].max()]
    best_val_results = best_val_results[val_metrics].iloc[0]
    # вычисление средних значений метрик для 
    iou_appl_metrics = [
        n for n in best_val_results.index
        if n.split('val_iou_')[-1] in applicable_surfaces]

    precision_appl_metrics = [
        n for n in best_val_results.index
        if n.split('val_precision_')[-1] in applicable_surfaces]

    recall_appl_metrics = [
        n for n in best_val_results.index
        if n.split('val_recall_')[-1] in applicable_surfaces]

    iou_non_appl_metrics = [
        n for n in best_val_results.index
        if n.split('val_iou_')[-1] in non_applicable_surfaces]

    precision_non_appl_metrics = [
        n for n in best_val_results.index
        if n.split('val_precision_')[-1] in non_applicable_surfaces]

    recall_non_appl_metrics = [
        n for n in best_val_results.index
        if n.split('val_recall_')[-1] in non_applicable_surfaces]
    mean_appl_iou = best_val_results[iou_appl_metrics].mean().item()
    mean_appl_precision = best_val_results[precision_appl_metrics].mean().item()
    mean_appl_recall = best_val_results[recall_appl_metrics].mean().item()

    mean_non_appl_iou = best_val_results[iou_non_appl_metrics].mean().item()
    mean_non_appl_precision = best_val_results[precision_non_appl_metrics].mean().item()
    mean_non_appl_recall = best_val_results[recall_non_appl_metrics].mean().item()
    #####################################
    mean_appl_surfaces = pd.Series(
        data={
            'mean_non_appl_iou': mean_non_appl_iou,
            'mean_non_appl_precision': mean_non_appl_precision,
            'mean_non_appl_recall': mean_non_appl_recall,
            'mean_appl_iou': mean_appl_iou,
            'mean_appl_precision': mean_appl_precision,
            'mean_appl_recall': mean_appl_recall,
            }
    )
    #appl_results = 
    results_entry = pd.concat([decoder_encoder_bands_stride, mean_appl_surfaces, best_val_results])

    all_results.append(results_entry)

all_results = pd.DataFrame(all_results)
all_results.to_csv('saving_dir/all_results2.csv', index=False)

100%|██████████| 240/240 [00:23<00:00, 10.41it/s]


In [46]:
path_to_metrics_csv

'Z:\\mikhail_u\\segmentation_results_DATCHIKI\\z_other\\att_unet_efficientnet-b0_cross_agg_2conv 2025-09-21T23-34-22\\version_0\\metrics.csv'

# Преобразование результатов

In [None]:
all_results_df = pd.read_csv(r'saving_dir\all_results2.csv')
encoders_list = all_results_df['encoder'].unique()
decoders_list = all_results_df['decoder'].unique()
encoders_list = ['tu-cspdarknet53', 'efficientnet-b2', 'tu-maxvit_tiny_rw_224']
decoders_list = ['fcn1', 'unet++', 'unet', 'fpn']
saving_metrics_names = ['iou', 'precision', 'recall']

bands_names_list = ['rgb', '10m', '10-20m', 'full_sp']
strides_list = [1, 2]

right_order_classes = [
    
    'buildings_territory',
    'transport',
    'water',
    'quasi_natural_ground',
    'quasi_natural_grow',
    'quasi_natural_wetland',
    'natural_ground',
    'natural_grow',
    'natural_wetland',
    'natural_wood',
    'UNLABELED',
    'mean_non_appl',
    'mean_appl',
    'mean',
    ]

save_only_mean_values = False
save_only_class_reslts = False

# итерирование по декодерам
for decoder_name in decoders_list:
    decoder_df = all_results_df[all_results_df['decoder']==decoder_name]
    for saving_metric_name in saving_metrics_names:
        # получение имен метрик выполняется следующим образом:
        # выполняется поиск в именах колонок имени сохраняемой метрики: 'iou', 'precision', 'recall'
        metric_names = np.array([c for c in all_results_df.columns if saving_metric_name in c])
        class_names_args = []
        for n in right_order_classes:
            for i, col in enumerate(metric_names):
                if n == 'mean' and not save_only_class_reslts:
                    #print(n, col)
                    if n in col and 'appl' not in col:
                        class_names_args.append(i)
                        break
                elif not save_only_mean_values:
                    if n in col:
                        class_names_args.append(i)
                        break

        metric_names = metric_names[class_names_args].tolist()
        columns_multiindex = pd.MultiIndex.from_product([bands_names_list, strides_list])
        rows_multiindex = pd.MultiIndex.from_product([metric_names, encoders_list])

        new_results_df = pd.DataFrame(columns=columns_multiindex, index=rows_multiindex)
        for metric_name in metric_names:
            for encoder_name in encoders_list:
                #condition =  & decoder_df['encoder']==encoder_name
                encoder_df = decoder_df[decoder_df['encoder']==encoder_name]
                row_index = (metric_name, encoder_name)
                
                for band_name in bands_names_list:
                    for stride in strides_list:
                        condition = (encoder_df['bands'] == band_name) & (encoder_df['stride'] == stride)
                        band_stride_df = encoder_df[condition]
                        metric_val = band_stride_df[metric_name].iloc[0]
                        col_index = (band_name, stride)
                        new_results_df.loc[row_index, col_index] = np.round(metric_val, decimals=3).item()
        saving_name = f'{decoder_name}_{saving_metric_name}'
        if save_only_mean_values:
            saving_name = f'{saving_name}_mean'
        if save_only_mean_values:
            saving_name = f'{saving_name}_classes'
        saving_name = f'{saving_name}.csv'
        path_to_save = os.path.join('saving_dir', saving_name)

        new_results_df.to_csv(path_to_save)

new_results_df

Unnamed: 0_level_0,Unnamed: 1_level_0,rgb,rgb,10m,10m,10-20m,10-20m,full_sp,full_sp
Unnamed: 0_level_1,Unnamed: 1_level_1,1,2,1,2,1,2,1,2
val_recall_buildings_territory,tu-cspdarknet53,0.845,0.845,0.842,0.853,0.829,0.849,0.853,0.84
val_recall_buildings_territory,efficientnet-b2,0.846,0.856,0.85,0.866,0.842,0.843,0.872,0.879
val_recall_buildings_territory,tu-maxvit_tiny_rw_224,0.865,0.875,0.877,0.901,0.853,0.832,0.861,0.872
val_recall_transport,tu-cspdarknet53,0.493,0.429,0.519,0.465,0.472,0.443,0.477,0.481
val_recall_transport,efficientnet-b2,0.397,0.397,0.527,0.393,0.475,0.363,0.478,0.41
val_recall_transport,tu-maxvit_tiny_rw_224,0.458,0.422,0.495,0.446,0.469,0.459,0.46,0.399
val_recall_water,tu-cspdarknet53,0.777,0.822,0.907,0.926,0.918,0.906,0.922,0.906
val_recall_water,efficientnet-b2,0.787,0.783,0.9,0.909,0.924,0.915,0.923,0.918
val_recall_water,tu-maxvit_tiny_rw_224,0.862,0.788,0.917,0.897,0.922,0.916,0.921,0.92
val_recall_quasi_natural_ground,tu-cspdarknet53,0.054,0.02,0.011,0.07,0.054,0.0,0.005,0.039


In [None]:
for path_to_metric in tqdm(paths_to_results_csv_list):
    path_to_metrics_dir, _ = os.path.split(path_to_metric)
    path_to_train_confusion = os.path.join(path_to_metrics_dir, 'train_confusion_matrices.csv')
    path_to_val_confusion = os.path.join(path_to_metrics_dir, 'val_confusion_matrices.csv')
    val_confusion_df = pd.read_csv(path_to_val_confusion)
    train_confusion_df = pd.read_csv(path_to_train_confusion)
    metrics_df = pd.read_csv(path_to_metric)
    
    epochs_list = val_confusion_df['epoch'].unique()
    classes_list = val_confusion_df['classes'].unique()

    # замещаем результаты recall
    for class_name in classes_list:
        train_recall = f'train_recall_{class_name}'
        val_recall = f'val_recall_{class_name}'
        metrics_df[train_recall] = [np.nan for i in range(len(metrics_df))]
        metrics_df[val_recall] = [np.nan for i in range(len(metrics_df))]

    
    for epoch in epochs_list:
        train_epoch_confusion = train_confusion_df[train_confusion_df['epoch']==epoch]
        val_epoch_confusion = val_confusion_df[val_confusion_df['epoch']==epoch]
        if epoch==0:
            val_epoch_confusion = val_epoch_confusion.iloc[len(classes_list):]

        epoch_metrics_df = metrics_df[metrics_df['epoch']==epoch]
        train_index = epoch_metrics_df[~epoch_metrics_df['train_precision_mean'].isna()].index[0].item()
        val_index = epoch_metrics_df[~epoch_metrics_df['val_precision_mean'].isna()].index[0].item()

        train_recall_list = []
        val_recall_list = []
        for class_name in classes_list:
            val_class_value = val_epoch_confusion[val_epoch_confusion['classes']==class_name][class_name]
            val_recall = val_class_value/val_epoch_confusion[val_epoch_confusion['classes']==class_name][classes_list].sum(axis=1)
            val_recall = val_recall.item()

            train_class_value = train_epoch_confusion[train_epoch_confusion['classes']==class_name][class_name]
            train_recall = train_class_value/train_epoch_confusion[train_epoch_confusion['classes']==class_name][classes_list].sum(axis=1)
            train_recall = train_recall.item()
            train_recall_list.append(train_recall)
            val_recall_list.append(val_recall)

            train_recall_name = f'train_recall_{class_name}'
            val_recall_name = f'val_recall_{class_name}'
            
            metrics_df.loc[val_index, val_recall_name] = val_recall
            metrics_df.loc[train_index, train_recall_name] = train_recall

        mean_train_recall = np.mean(train_recall_list).item()
        mean_val_recall = np.mean(val_recall_list).item()
        metrics_df.loc[train_index, 'train_recall_mean'] = mean_train_recall
        metrics_df.loc[val_index, 'val_recall_mean'] = mean_val_recall
    metrics_df.to_csv(path_to_metric, index=False)

train_recall_name

100%|██████████| 240/240 [52:09<00:00, 13.04s/it]


'train_recall_water'

In [None]:
metrics_names = ['val_iou', 'val_precision', 'val_recall']
iou_appl_metrics = [
    n for n in best_val_results.index
    if n.split('val_iou_')[-1] in applicable_surfaces]

precision_appl_metrics = [
    n for n in best_val_results.index
    if n.split('val_precision_')[-1] in applicable_surfaces]

recall_appl_metrics = [
    n for n in best_val_results.index
    if n.split('val_recall_')[-1] in applicable_surfaces]

iou_non_appl_metrics = [
    n for n in best_val_results.index
    if n.split('val_iou_')[-1] in non_applicable_surfaces]

precision_non_appl_metrics = [
    n for n in best_val_results.index
    if n.split('val_precision_')[-1] in non_applicable_surfaces]

recall_non_appl_metrics = [
    n for n in best_val_results.index
    if n.split('val_recall_')[-1] in non_applicable_surfaces]
mean_appl_iou = best_val_results[iou_appl_metrics].mean().item()
mean_appl_precision = best_val_results[precision_appl_metrics].mean().item()
mean_appl_recall = best_val_results[recall_appl_metrics].mean().item()

mean_non_appl_iou = best_val_results[iou_non_appl_metrics].mean().item()
mean_non_appl_precision = best_val_results[precision_non_appl_metrics].mean().item()
mean_non_appl_recall = best_val_results[recall_non_appl_metrics].mean().item()
mean_non_appl_iou, mean_non_appl_precision, mean_non_appl_recall
mean_appl_iou, mean_appl_precision, mean_appl_recall


(0.44228480011224747, 0.6340819597244263, 0.6340819597244263)

# Анализ результатов

In [70]:
path_to_all_results = r'saving_dir\all_results2.csv'
encoders_list = ['tu-cspdarknet53', 'efficientnet-b2', 'tu-maxvit_tiny_rw_224']
decoders_list = ['fcn1', 'unet++', 'unet', 'fpn']
saving_metrics_names = ['iou', 'precision', 'recall']

bands_names_list = ['rgb', '10m', '10-20m', 'full_sp']
strides_list = [1, 2]

right_order_classes = [
    'mean',
    'mean_non_appl',
    'mean_appl',
    'buildings_territory',
    'transport',
    'water',
    'quasi_natural_ground',
    'quasi_natural_grow',
    'quasi_natural_wetland',
    'natural_ground',
    'natural_grow',
    'natural_wetland',
    'natural_wood',
    'UNLABELED',
    ]

all_results_df = pd.read_csv(path_to_all_results)

# Фильтрация энкодеров и декодеров
#
# составляем булеву таблицу, где строки - строки исходной таблицы, а столбцы - булевы значения 
# выражающие наличие/отсутствие энкодера из списка допустимых.
# отбор данных - операция any (логическое ИЛИ), выполненная вдоль строк
encoder_condition = pd.DataFrame({e:all_results_df['encoder']==e for e in encoders_list}).any(axis='columns')
# составляем булеву таблицу, где строки - строки исходной таблицы, а столбцы - булевы значения 
# выражающие наличие/отсутствие энкодера из списка допустимых.
# отбор данных - операция any (логическое ИЛИ), выполненная вдоль строк
decoder_condition = pd.DataFrame({d:all_results_df['decoder']==d for d in decoders_list}).any(axis='columns')
condition = encoder_condition & decoder_condition
all_results_df = all_results_df[condition]
all_results_df = all_results_df.round(decimals=3)

for decoder_name in decoders_list:
    decoder_df = all_results_df[all_results_df['decoder']==decoder_name]
    decoder_df['encoder'] = decoder_df['encoder'].apply(lambda x: {n: f'{j}_{n}' for j, n in enumerate(encoders_list)}[x])
    decoder_df['bands'] = decoder_df['bands'].apply(lambda x: {n: f'{j}_{n}' for j, n in enumerate(bands_names_list)}[x])
    decoder_df = decoder_df.sort_values(by=['encoder', 'bands', 'stride'])
    # общие имена: IoU, Precision, Recall
    for saving_metric_name in saving_metrics_names:
        # получение имен метрик выполняется следующим образом:
        # выполняется поиск в именах колонок имени сохраняемой метрики: 'iou', 'precision', 'recall'
        metric_names = np.array([c for c in all_results_df.columns if saving_metric_name in c])
        class_names_args = []
        for n in right_order_classes:
            for i, col in enumerate(metric_names):
                if n == 'mean':
                    #print(n, col)
                    if n in col and 'appl' not in col:
                        class_names_args.append(i)
                        break
                
                elif n in col:
                    class_names_args.append(i)
                    break

        metric_names = metric_names[class_names_args].tolist()
        metric_df = decoder_df[['decoder', 'encoder', 'bands', 'stride'] + metric_names]
   
        path_to_save = os.path.join('saving_dir', '0_right_order', f'{decoder_name}_{saving_metric_name}.csv')
        metric_df.to_csv(path_to_save, index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  decoder_df['encoder'] = decoder_df['encoder'].apply(lambda x: {n: f'{j}_{n}' for j, n in enumerate(encoders_list)}[x])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  decoder_df['bands'] = decoder_df['bands'].apply(lambda x: {n: f'{j}_{n}' for j, n in enumerate(bands_names_list)}[x])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#r

In [71]:
decoder_df

Unnamed: 0,decoder,encoder,bands,stride,mean_non_appl_iou,mean_non_appl_precision,mean_non_appl_recall,mean_appl_iou,mean_appl_precision,mean_appl_recall,...,val_recall_mean,val_recall_natural_ground,val_recall_natural_grow,val_recall_natural_wetland,val_recall_natural_wood,val_recall_quasi_natural_ground,val_recall_quasi_natural_grow,val_recall_quasi_natural_wetland,val_recall_transport,val_recall_water
150,fpn,0_tu-cspdarknet53,0_rgb,1,0.465,0.645,0.568,0.477,0.646,0.603,...,0.605,0.546,0.351,0.588,0.927,0.054,0.61,0.63,0.493,0.777
151,fpn,0_tu-cspdarknet53,0_rgb,2,0.47,0.644,0.554,0.456,0.623,0.585,...,0.591,0.499,0.365,0.551,0.923,0.02,0.605,0.605,0.429,0.822
146,fpn,0_tu-cspdarknet53,1_10m,1,0.531,0.66,0.628,0.459,0.642,0.579,...,0.629,0.48,0.385,0.516,0.936,0.011,0.635,0.856,0.519,0.907
147,fpn,0_tu-cspdarknet53,1_10m,2,0.521,0.678,0.613,0.467,0.65,0.579,...,0.621,0.542,0.299,0.541,0.935,0.07,0.607,0.758,0.465,0.926
144,fpn,0_tu-cspdarknet53,2_10-20m,1,0.503,0.683,0.587,0.479,0.66,0.595,...,0.612,0.609,0.301,0.521,0.95,0.054,0.616,0.634,0.472,0.918
145,fpn,0_tu-cspdarknet53,2_10-20m,2,0.497,0.64,0.588,0.47,0.615,0.622,...,0.623,0.657,0.347,0.547,0.937,0.0,0.642,0.689,0.443,0.906
148,fpn,0_tu-cspdarknet53,3_full_sp,1,0.509,0.648,0.596,0.483,0.633,0.621,...,0.626,0.581,0.356,0.615,0.931,0.005,0.559,0.762,0.477,0.922
149,fpn,0_tu-cspdarknet53,3_full_sp,2,0.505,0.718,0.6,0.453,0.615,0.582,...,0.615,0.542,0.257,0.595,0.934,0.039,0.708,0.625,0.481,0.906
230,fpn,1_efficientnet-b2,0_rgb,1,0.484,0.656,0.573,0.433,0.589,0.57,...,0.595,0.488,0.3,0.583,0.907,0.048,0.626,0.731,0.397,0.787
231,fpn,1_efficientnet-b2,0_rgb,2,0.475,0.646,0.556,0.39,0.563,0.506,...,0.563,0.242,0.362,0.506,0.916,0.017,0.57,0.713,0.397,0.783


In [None]:
path_to_all_results = r'saving_dir\all_results2.csv'
encoders_list = ['tu-cspdarknet53', 'efficientnet-b2', 'tu-maxvit_tiny_rw_224']
decoders_list = ['fcn1', 'unet++', 'unet', 'fpn']
saving_metrics_names = ['iou', 'precision', 'recall']

bands_names_list = ['rgb', '10m', '10-20m', 'full_sp']
strides_list = [1, 2]

right_order_classes = [
    'mean',
    'mean_non_appl',
    'mean_appl',
    'buildings_territory',
    'transport',
    'water',
    'quasi_natural_ground',
    'quasi_natural_grow',
    'quasi_natural_wetland',
    'natural_ground',
    'natural_grow',
    'natural_wetland',
    'natural_wood',
    'UNLABELED',
    ]

all_results_df = pd.read_csv(path_to_all_results)

# Фильтрация энкодеров и декодеров
#
# составляем булеву таблицу, где строки - строки исходной таблицы, а столбцы - булевы значения 
# выражающие наличие/отсутствие энкодера из списка допустимых.
# отбор данных - операция any (логическое ИЛИ), выполненная вдоль строк
encoder_condition = pd.DataFrame({e:all_results_df['encoder']==e for e in encoders_list}).any(axis='columns')
# составляем булеву таблицу, где строки - строки исходной таблицы, а столбцы - булевы значения 
# выражающие наличие/отсутствие энкодера из списка допустимых.
# отбор данных - операция any (логическое ИЛИ), выполненная вдоль строк
decoder_condition = pd.DataFrame({d:all_results_df['decoder']==d for d in decoders_list}).any(axis='columns')
condition = encoder_condition & decoder_condition
all_results_df = all_results_df[condition]
all_results_df = all_results_df.round(decimals=3)

saving_matrics_dfs = {name: pd.DataFrame() for name in saving_metrics_names}

for decoder_name in decoders_list:
    decoder_df = all_results_df[all_results_df['decoder']==decoder_name]
    decoder_df['encoder'] = decoder_df['encoder'].apply(lambda x: {n: f'{j}_{n}' for j, n in enumerate(encoders_list)}[x])
    decoder_df['bands'] = decoder_df['bands'].apply(lambda x: {n: f'{j}_{n}' for j, n in enumerate(bands_names_list)}[x])
    decoder_df = decoder_df.sort_values(by=['encoder', 'bands', 'stride'])
    # общие имена: IoU, Precision, Recall
    for saving_metric_name in saving_metrics_names:
        decoder_and_metric = f'{saving_metric_name}_{decoder_name}'
        # получение имен метрик выполняется следующим образом:
        # выполняется поиск в именах колонок имени сохраняемой метрики: 'iou', 'precision', 'recall'
        metric_names = np.array([c for c in all_results_df.columns if saving_metric_name in c])
        class_names_args = []
        for n in right_order_classes:
            for i, col in enumerate(metric_names):
                if n == 'mean':
                    #print(n, col)
                    if n in col and 'appl' not in col:
                        class_names_args.append(i)
                        break
                
                elif n in col:
                    class_names_args.append(i)
                    break

        metric_names = metric_names[class_names_args].tolist()
        metric_df = decoder_df[['decoder', 'encoder', 'bands', 'stride'] + metric_names]
   
        path_to_save = os.path.join('saving_dir', '0_right_order', f'{decoder_name}_{saving_metric_name}.csv')
        #metric_df.to_csv(path_to_save, index=False)

decoder_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  decoder_df['encoder'] = decoder_df['encoder'].apply(lambda x: {n: f'{j}_{n}' for j, n in enumerate(encoders_list)}[x])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  decoder_df['bands'] = decoder_df['bands'].apply(lambda x: {n: f'{j}_{n}' for j, n in enumerate(bands_names_list)}[x])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#r

Unnamed: 0,decoder,encoder,bands,stride,mean_non_appl_iou,mean_non_appl_precision,mean_non_appl_recall,mean_appl_iou,mean_appl_precision,mean_appl_recall,...,val_recall_mean,val_recall_natural_ground,val_recall_natural_grow,val_recall_natural_wetland,val_recall_natural_wood,val_recall_quasi_natural_ground,val_recall_quasi_natural_grow,val_recall_quasi_natural_wetland,val_recall_transport,val_recall_water
150,fpn,0_tu-cspdarknet53,0_rgb,1,0.465,0.645,0.568,0.477,0.646,0.603,...,0.605,0.546,0.351,0.588,0.927,0.054,0.61,0.63,0.493,0.777
151,fpn,0_tu-cspdarknet53,0_rgb,2,0.47,0.644,0.554,0.456,0.623,0.585,...,0.591,0.499,0.365,0.551,0.923,0.02,0.605,0.605,0.429,0.822
146,fpn,0_tu-cspdarknet53,1_10m,1,0.531,0.66,0.628,0.459,0.642,0.579,...,0.629,0.48,0.385,0.516,0.936,0.011,0.635,0.856,0.519,0.907
147,fpn,0_tu-cspdarknet53,1_10m,2,0.521,0.678,0.613,0.467,0.65,0.579,...,0.621,0.542,0.299,0.541,0.935,0.07,0.607,0.758,0.465,0.926
144,fpn,0_tu-cspdarknet53,2_10-20m,1,0.503,0.683,0.587,0.479,0.66,0.595,...,0.612,0.609,0.301,0.521,0.95,0.054,0.616,0.634,0.472,0.918
145,fpn,0_tu-cspdarknet53,2_10-20m,2,0.497,0.64,0.588,0.47,0.615,0.622,...,0.623,0.657,0.347,0.547,0.937,0.0,0.642,0.689,0.443,0.906
148,fpn,0_tu-cspdarknet53,3_full_sp,1,0.509,0.648,0.596,0.483,0.633,0.621,...,0.626,0.581,0.356,0.615,0.931,0.005,0.559,0.762,0.477,0.922
149,fpn,0_tu-cspdarknet53,3_full_sp,2,0.505,0.718,0.6,0.453,0.615,0.582,...,0.615,0.542,0.257,0.595,0.934,0.039,0.708,0.625,0.481,0.906
230,fpn,1_efficientnet-b2,0_rgb,1,0.484,0.656,0.573,0.433,0.589,0.57,...,0.595,0.488,0.3,0.583,0.907,0.048,0.626,0.731,0.397,0.787
231,fpn,1_efficientnet-b2,0_rgb,2,0.475,0.646,0.556,0.39,0.563,0.506,...,0.563,0.242,0.362,0.506,0.916,0.017,0.57,0.713,0.397,0.783


In [89]:
path_to_results_root = r'saving_dir\0_right_order'
saving_metrics_names = ['iou', 'precision', 'recall']

path_to_save_all_models_metrics_root = r'saving_dir\!all_models_in_one_table'
os.makedirs(path_to_save_all_models_metrics_root, exist_ok=True)

# [val_recall_mean	mean_non_appl_recall	mean_appl_recall	val_recall_buildings_territory	val_recall_transport	val_recall_water	val_recall_quasi_natural_ground	val_recall_quasi_natural_grow	val_recall_quasi_natural_wetland	val_recall_natural_ground	val_recall_natural_grow	val_recall_natural_wetland	val_recall_natural_wood	val_recall_UNLABELED]
for metric_name in saving_metrics_names:
    paths_to_metric_results = glob.glob(os.path.join(path_to_results_root, f'*{metric_name}*.csv'))
    
    metric_col_names = [f'val_{metric_name}_mean',
        f'mean_non_appl_{metric_name}',
        f'mean_appl_{metric_name}',
        f'val_{metric_name}_buildings_territory',
        f'val_{metric_name}_transport',
        f'val_{metric_name}_water',
        f'val_{metric_name}_quasi_natural_ground',
        f'val_{metric_name}_quasi_natural_grow',
        f'val_{metric_name}_quasi_natural_wetland',
        f'val_{metric_name}_natural_ground',
        f'val_{metric_name}_natural_grow',
        f'val_{metric_name}_natural_wetland',
        f'val_{metric_name}_natural_wood',
        f'val_{metric_name}_UNLABELED']

    results_dict = {}
    for path_to_result in paths_to_metric_results:
        result_df = pd.read_csv(path_to_result)
        model_name = result_df['decoder'].unique().item()
        results_dict[model_name] = result_df

    all_models_metric_df = result_df[['encoder', 'bands', 'stride']]
    
    for col_name in metric_col_names:
        for model_name, model_results_df in results_dict.items():
            col_model_metric_name = f'{col_name} {model_name}'
            all_models_metric_df[col_model_metric_name] = model_results_df[col_name]

    path_to_save_metric = os.path.join(path_to_save_all_models_metrics_root, f'{metric_name}.csv')
    all_models_metric_df.to_csv(path_to_save_metric, index=False)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  all_models_metric_df[col_model_metric_name] = model_results_df[col_name]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  all_models_metric_df[col_model_metric_name] = model_results_df[col_name]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  all_models_metric_df[col_model_metric_name] = model_results

In [87]:
model_results_df

Unnamed: 0,decoder,encoder,bands,stride,val_iou_mean,mean_non_appl_iou,mean_appl_iou,val_iou_buildings_territory,val_iou_transport,val_iou_water,val_iou_quasi_natural_ground,val_iou_quasi_natural_grow,val_iou_quasi_natural_wetland,val_iou_natural_ground,val_iou_natural_grow,val_iou_natural_wetland,val_iou_natural_wood,val_iou_UNLABELED
0,fcn1,0_tu-cspdarknet53,0_rgb,1,0.488,0.464,0.441,0.724,0.331,0.774,0.03,0.432,0.495,0.33,0.232,0.41,0.794,0.816
1,fcn1,0_tu-cspdarknet53,0_rgb,2,0.488,0.458,0.45,0.716,0.318,0.696,0.046,0.418,0.556,0.371,0.228,0.411,0.789,0.816
2,fcn1,0_tu-cspdarknet53,1_10m,1,0.521,0.518,0.454,0.722,0.357,0.805,0.046,0.498,0.677,0.327,0.24,0.46,0.788,0.817
3,fcn1,0_tu-cspdarknet53,1_10m,2,0.513,0.511,0.439,0.713,0.337,0.821,0.043,0.468,0.685,0.314,0.221,0.429,0.79,0.817
4,fcn1,0_tu-cspdarknet53,2_10-20m,1,0.518,0.498,0.472,0.726,0.335,0.838,0.043,0.473,0.575,0.457,0.159,0.469,0.803,0.817
5,fcn1,0_tu-cspdarknet53,2_10-20m,2,0.517,0.498,0.472,0.701,0.33,0.826,0.047,0.438,0.643,0.472,0.212,0.41,0.792,0.817
6,fcn1,0_tu-cspdarknet53,3_full_sp,1,0.519,0.498,0.477,0.71,0.356,0.831,0.072,0.414,0.608,0.472,0.235,0.414,0.785,0.817
7,fcn1,0_tu-cspdarknet53,3_full_sp,2,0.508,0.483,0.468,0.726,0.332,0.846,0.04,0.491,0.462,0.416,0.219,0.44,0.796,0.817
8,fcn1,1_efficientnet-b2,0_rgb,1,0.462,0.475,0.353,0.726,0.322,0.707,0.042,0.432,0.622,0.0,0.214,0.41,0.788,0.816
9,fcn1,1_efficientnet-b2,0_rgb,2,0.46,0.47,0.356,0.703,0.278,0.731,0.027,0.423,0.657,0.119,0.187,0.36,0.757,0.817


In [None]:
path_to_all_results = r'saving_dir\all_results.csv'
all_resuls_df = pd.read_csv(path_to_all_results)

metric_names = [
    'val_iou_mean',
    'mean_non_appl_iou',
    'mean_appl_iou',
    'val_precision_mean',
    'val_recall_mean',
    'val_iou_transport',
    'val_precision_transport',
    'val_recall_transport',]

for metric_name in metric_names:
    metric_df = all_resuls_df[['decoder', 'encoder', 'bands', 'stride', metric_name]]
    metric_df = metric_df.sort_values(by=metric_name, ascending=False)
    print(metric_name)
    print(metric_df.head(20))
    print('-------------------------------------------------')

In [None]:

decoders_list = all_resuls_df['decoder'].unique().tolist()

metric_names = [
    'val_iou_mean',
    'mean_non_appl_iou',
    'mean_appl_iou',
    'val_precision_mean',
    'val_recall_mean',
    ]

saving_dir = r'saving_dir/mean_metrics'
os.makedirs(saving_dir, exist_ok=True)

for decoder_name in decoders_list:
    condition = all_resuls_df['decoder'] == decoder_name
    decoder_df = all_resuls_df[condition][['decoder', 'encoder', 'bands', 'stride'] + metric_names]
    path_to_save = os.path.join(saving_dir, f'{decoder_name}_mean.csv')
    decoder_df.to_csv(path_to_save, index=False)

decoder_df[metric_names]

# Парето-анализ резльтатов

## Алгоритм построения множества Парето

In [None]:
points = np.random.rand(100, 11)
#------------------------------------------------------------------------
# список недоминируемых (потенциально "лучших") точек
# или точек, удовлетворяющих коритерию Парето
non_dominated_points = np.ones(points.shape[0], dtype=bool)
for i, point in enumerate(points):
    if non_dominated_points[i]:
        # поиск всех точек, которые хуже данной по критерию Парето:
        # 1. Есть хотя бы одно измерение, по которому данная точка лучше соответствующего измерения остальных
        # 2. Все остальные измерения других точек не лучше соответствующих измерений данной
        current_point_dominated_by = np.all(points <= point, axis=1) & np.any(points < point, axis=1)
        if np.any(current_point_dominated_by):
            # если хотя бы одна точка признана "лучше", чем данная по
            # критерию Парето, то она помечается как доминируемая и исключается из множества Парето
            non_dominated_points[i] = False
        else:
            # Если данная точка удовлетворяет критерию Парето, 
            # то ищем все точки, которые доминируются данной, т.е. "хуже " по критерию Парето 
            points_dominated_by_current = np.all(points >= point, axis=1) & np.any(points > point, axis=1)
            non_dominated_points[points_dominated_by_current] = False
            non_dominated_points[i] = True

points[non_dominated_points]

In [None]:
def build_pareto_front_df(df, objectives=None):
    """
    Build the Pareto front from a pandas DataFrame with four objectives (minimization).
    
    Parameters:
    df (pd.DataFrame): Input dataframe where rows are points.
    objectives (list): List of column names to use as objectives. If None, uses all columns.
    
    Returns:
    pd.DataFrame: The Pareto front points with original indices preserved.
    """
    if objectives is None:
        objectives = df.columns.tolist()
    
    points = df[objectives].values
    indices = df.index.values
    pareto_mask = np.ones(len(points), dtype=bool)
    
    for i, point in enumerate(points):
        if pareto_mask[i]:
            # Check if any other point dominates this point
            # All objectives <= current point and at least one objective < current point
            dominated = np.all(points <= point, axis=1) & np.any(points < point, axis=1)
            
            # If any point dominates 'point', mark it as non-Pareto
            if np.any(dominated):
                pareto_mask[i] = False
            else:
                # Mark all points dominated by 'point' as non-Pareto
                # All objectives >= current point and at least one objective > current point
                dominated_by_current = np.all(points >= point, axis=1) & np.any(points > point, axis=1)
                pareto_mask[dominated_by_current] = False
                pareto_mask[i] = True  # Ensure current point remains marked
    
    # Return dataframe with original indices
    return df.loc[indices[pareto_mask]]

# For maximization: invert the objectives
def build_pareto_front_maximize(df, objectives=None):
    if objectives is None:
        objectives = df.columns.tolist()
    
    # Invert objectives for maximization
    df_inverted = df.copy()
    df_inverted[objectives] = -df_inverted[objectives]
    
    # Find Pareto front on inverted objectives
    pareto_inverted = build_pareto_front_df(df_inverted, objectives)
    
    # Return original values
    return df.loc[pareto_inverted.index]

def chebyshev_selection(pareto_front, weights=None, objectives=None):
    """
    Select solution using Chebyshev distance minimization.
    
    Parameters:
    pareto_front (pd.DataFrame): Pareto front solutions
    weights (list): Weights for each objective
    objectives (list): Objective column names
    
    Returns:
    pd.Series: Selected solution with Chebyshev distance
    """
    if objectives is None:
        objectives = pareto_front.columns.tolist()
    
    if weights is None:
        weights = [1/len(objectives)] * len(objectives)
    
    # Ideal point (assuming minimization)
    ideal_point = pareto_front[objectives].min().values
    
    # Extract and normalize objectives
    obj_values = pareto_front[objectives].values
    min_vals = obj_values.min(axis=0)
    max_vals = obj_values.max(axis=0)
    ranges = max_vals - min_vals
    ranges[ranges == 0] = 1
    
    normalized = (obj_values - min_vals) / ranges
    normalized_ideal = (ideal_point - min_vals) / ranges
    
    # Calculate weighted Chebyshev distances
    weights_array = np.array(weights)
    weighted_normalized = normalized * weights_array
    weighted_ideal = normalized_ideal * weights_array
    
    chebyshev_distances = np.max(np.abs(weighted_normalized - weighted_ideal), axis=1)
    
    # Select solution with minimum Chebyshev distance
    best_idx = chebyshev_distances.argmin()
    best_solution = pareto_front.iloc[best_idx].copy()
    best_solution['chebyshev_distance'] = chebyshev_distances[best_idx]
    
    return best_solution

def find_knee_point(pareto_front, objectives=None):
    """
    Find the knee point on Pareto front (works best for 2 objectives).
    
    Parameters:
    pareto_front (pd.DataFrame): Pareto front solutions
    objectives (list): Objective column names (first 2 will be used)
    
    Returns:
    pd.Series: Knee point solution
    """
    if objectives is None:
        objectives = pareto_front.columns.tolist()[:2]
    elif len(objectives) < 2:
        raise ValueError("Need at least 2 objectives for knee point detection")
    
    # Use first two objectives for knee point detection
    points = pareto_front[objectives[:2]].values
    
    # Sort by first objective
    sorted_idx = np.argsort(points[:, 0])
    sorted_points = points[sorted_idx]
    
    # Calculate normalized distance from extreme points
    x_norm = (sorted_points[:, 0] - sorted_points[:, 0].min()) / (sorted_points[:, 0].max() - sorted_points[:, 0].min())
    y_norm = (sorted_points[:, 1] - sorted_points[:, 1].min()) / (sorted_points[:, 1].max() - sorted_points[:, 1].min())
    
    # Calculate distance from line connecting extremes
    distances = np.abs(y_norm - x_norm) / np.sqrt(2)
    
    # Find point with maximum distance (knee point)
    knee_idx = sorted_idx[distances.argmax()]
    best_solution = pareto_front.iloc[knee_idx].copy()
    best_solution['knee_distance'] = distances.max()
    
    return best_solution

def topsis_selection(pareto_front, weights=None, objectives=None):
    """
    Select solution using TOPSIS method.
    
    Parameters:
    pareto_front (pd.DataFrame): Pareto front solutions
    weights (list): Weights for each objective
    objectives (list): Objective column names
    
    Returns:
    pd.Series: Selected solution with TOPSIS score
    """
    if objectives is None:
        objectives = pareto_front.columns.tolist()
    
    if weights is None:
        weights = [1/len(objectives)] * len(objectives)
    
    # Extract and normalize decision matrix
    decision_matrix = pareto_front[objectives].values
    normalized_matrix = decision_matrix / np.sqrt((decision_matrix**2).sum(axis=0))
    
    # Apply weights
    weighted_matrix = normalized_matrix * weights
    
    # Ideal and negative ideal solutions (assuming minimization)
    ideal_best = weighted_matrix.min(axis=0)
    ideal_worst = weighted_matrix.max(axis=0)
    
    # Calculate distances
    dist_best = np.sqrt(((weighted_matrix - ideal_best)**2).sum(axis=1))
    dist_worst = np.sqrt(((weighted_matrix - ideal_worst)**2).sum(axis=1))
    
    # Calculate TOPSIS scores
    topsis_scores = dist_worst / (dist_best + dist_worst)
    
    # Select solution with highest TOPSIS score
    best_idx = topsis_scores.argmax()
    best_solution = pareto_front.iloc[best_idx].copy()
    best_solution['topsis_score'] = topsis_scores[best_idx]
    
    return best_solution

def weighted_sum_selection(pareto_front, weights=None, objectives=None, normalize=True):
    """
    Select a solution using weighted sum approach.
    
    Parameters:
    pareto_front (pd.DataFrame): Pareto front solutions
    weights (list): Weights for each objective (sum should be 1)
    objectives (list): Objective column names
    normalize (bool): Whether to normalize objectives
    
    Returns:
    pd.Series: Selected solution with additional score
    """
    if objectives is None:
        objectives = pareto_front.columns.tolist()
    
    if weights is None:
        weights = [1/len(objectives)] * len(objectives)  # Equal weights
    
    # Extract objective values
    obj_values = pareto_front[objectives].values
    
    if normalize:
        # Normalize objectives to [0,1] range (assuming minimization)
        min_vals = obj_values.min(axis=0)
        max_vals = obj_values.max(axis=0)
        # Avoid division by zero
        ranges = max_vals - min_vals
        ranges[ranges == 0] = 1
        normalized = (obj_values - min_vals) / ranges
    else:
        normalized = obj_values
    
    # Calculate weighted scores
    weights_array = np.array(weights)
    scores = normalized @ weights_array
    
    # Find solution with minimum score (for minimization)
    best_idx = scores.argmin()
    best_solution = pareto_front.iloc[best_idx].copy()
    best_solution['weighted_score'] = scores[best_idx]
    
    return best_solution

# проверка функций
path_to_results = r'saving_dir\all_results2.csv'
all_resuls_df = pd.read_csv(path_to_results)
decoder_name = 'unet'

metric_names = [
    #'val_iou_mean',
    'mean_non_appl_iou',
    'mean_appl_iou',
    'mean_non_appl_precision',
    'mean_appl_precision',
    'mean_non_appl_recall',
    'mean_appl_recall',
    #'val_precision_mean',
    #'val_recall_mean',
    ]

decoder_df = all_resuls_df[all_resuls_df['decoder']==decoder_name]

pareto_front_df = build_pareto_front_maximize(decoder_df, objectives=metric_names)

best_chebyshev = chebyshev_selection(pareto_front_df, objectives=metric_names)
best_topsis = topsis_selection(pareto_front_df, objectives=metric_names)
best_weighted_sum = weighted_sum_selection(pareto_front_df, objectives=metric_names)
best_chebyshev, best_topsis, best_weighted_sum
pareto_front_df

Unnamed: 0,decoder,encoder,bands,stride,mean_non_appl_iou,mean_non_appl_precision,mean_non_appl_recall,mean_appl_iou,mean_appl_precision,mean_appl_recall,...,val_recall_mean,val_recall_natural_ground,val_recall_natural_grow,val_recall_natural_wetland,val_recall_natural_wood,val_recall_quasi_natural_ground,val_recall_quasi_natural_grow,val_recall_quasi_natural_wetland,val_recall_transport,val_recall_water
40,unet,tu-cspdarknet53,10-20m,1,0.487887,0.660494,0.58172,0.474202,0.665529,0.594065,...,0.608708,0.484233,0.403305,0.565578,0.923143,0.066474,0.595535,0.596314,0.497979,0.882971
42,unet,tu-cspdarknet53,10m,1,0.509294,0.682907,0.5914,0.483956,0.64593,0.615319,...,0.621659,0.571801,0.359022,0.594439,0.936015,0.045794,0.632706,0.645225,0.503083,0.898256
43,unet,tu-cspdarknet53,10m,2,0.528317,0.704331,0.625192,0.471337,0.665577,0.58423,...,0.628833,0.493761,0.359133,0.543843,0.940182,0.059822,0.654927,0.792563,0.500568,0.907229
44,unet,tu-cspdarknet53,full_sp,1,0.510353,0.663927,0.611917,0.486039,0.64519,0.621922,...,0.635307,0.561593,0.365022,0.639993,0.92108,0.084111,0.638028,0.691624,0.507023,0.914943
46,unet,tu-cspdarknet53,rgb,1,0.473775,0.649568,0.563331,0.496632,0.647327,0.634881,...,0.613499,0.643262,0.377803,0.595569,0.922888,0.050178,0.598547,0.612837,0.477862,0.802242
106,unet,tu-seresnext50_32x4d,10m,1,0.518494,0.658309,0.617741,0.42741,0.670927,0.528948,...,0.604753,0.289701,0.338429,0.561158,0.926504,0.072044,0.594323,0.804271,0.445862,0.907196
178,unet,tu-mobilenetv4_hybrid_medium,10m,1,0.458918,0.63219,0.543868,0.34142,0.70246,0.420663,...,0.525049,0.045712,0.209639,0.465488,0.961814,0.045485,0.624521,0.508261,0.350477,0.90415
181,unet,tu-mobilenetv4_hybrid_medium,full_sp,2,0.441523,0.646416,0.520808,0.459572,0.66802,0.561464,...,0.563706,0.510662,0.268141,0.522549,0.944503,0.041463,0.639388,0.41238,0.263167,0.906552
200,unet,efficientnet-b2,10-20m,1,0.493495,0.653546,0.581254,0.486589,0.662082,0.605969,...,0.612778,0.580649,0.342465,0.564859,0.935904,0.048167,0.640256,0.562836,0.460527,0.923746
202,unet,efficientnet-b2,10m,1,0.504131,0.709583,0.575605,0.468077,0.635872,0.598698,...,0.607192,0.552291,0.381688,0.52636,0.934454,0.020473,0.565219,0.694967,0.434257,0.919811


In [31]:
path_to_results = r'saving_dir\all_results2.csv'
all_resuls_df = pd.read_csv(path_to_results)

encoders_list = ['tu-cspdarknet53',
    'efficientnet-b2',
    #'tu-seresnext50_32x4d',
    #'tu-mobilenetv4_hybrid_medium',
    #'densenet121',
    'tu-maxvit_tiny_rw_224']
decoders_list = [
    'fcn1',
    #'unet++',
    'unet',
    'fpn'
    ]

# Фильтрация энкодеров и декодеров
#
# составляем булеву таблицу, где строки - строки исходной таблицы, а столбцы - булевы значения 
# выражающие наличие/отсутствие энкодера из списка допустимых.
# отбор данных - операция any (логическое ИЛИ), выполненная вдоль строк
encoder_condition = pd.DataFrame({e:all_resuls_df['encoder']==e for e in encoders_list}).any(axis='columns')
# составляем булеву таблицу, где строки - строки исходной таблицы, а столбцы - булевы значения 
# выражающие наличие/отсутствие энкодера из списка допустимых.
# отбор данных - операция any (логическое ИЛИ), выполненная вдоль строк
decoder_condition = pd.DataFrame({d:all_resuls_df['decoder']==d for d in decoders_list}).any(axis='columns')
condition = encoder_condition & decoder_condition
all_resuls_df = all_resuls_df[condition]
all_resuls_df

Unnamed: 0,decoder,encoder,bands,stride,mean_non_appl_iou,mean_non_appl_precision,mean_non_appl_recall,mean_appl_iou,mean_appl_precision,mean_appl_recall,...,val_recall_mean,val_recall_natural_ground,val_recall_natural_grow,val_recall_natural_wetland,val_recall_natural_wood,val_recall_quasi_natural_ground,val_recall_quasi_natural_grow,val_recall_quasi_natural_wetland,val_recall_transport,val_recall_water
0,fcn1,tu-cspdarknet53,10-20m,1,0.498342,0.638583,0.613097,0.471981,0.630769,0.596358,...,0.626621,0.591311,0.219089,0.638387,0.936644,0.054200,0.682092,0.776871,0.438580,0.882932
1,fcn1,tu-cspdarknet53,10-20m,2,0.497622,0.701926,0.579892,0.471685,0.649926,0.592341,...,0.607083,0.546620,0.360539,0.534528,0.927677,0.051003,0.561106,0.708927,0.439071,0.882413
2,fcn1,tu-cspdarknet53,10m,1,0.517524,0.680980,0.616809,0.453593,0.670547,0.561873,...,0.616151,0.364905,0.359624,0.601920,0.921042,0.053427,0.643407,0.753455,0.483987,0.916663
3,fcn1,tu-cspdarknet53,10m,2,0.511100,0.670735,0.603167,0.438550,0.638057,0.547945,...,0.603611,0.375567,0.334627,0.549250,0.932334,0.052756,0.608516,0.738993,0.465577,0.905271
4,fcn1,tu-cspdarknet53,full_sp,1,0.498432,0.664138,0.601362,0.476517,0.663894,0.601751,...,0.622139,0.515086,0.413648,0.575327,0.902941,0.091434,0.513191,0.750320,0.473646,0.926632
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
227,fpn,efficientnet-b2,10m,2,0.486088,0.660451,0.577851,0.472729,0.658452,0.587316,...,0.604432,0.553652,0.336147,0.521005,0.938459,0.033469,0.552612,0.712460,0.393389,0.909092
228,fpn,efficientnet-b2,full_sp,1,0.511636,0.710644,0.609234,0.476315,0.635192,0.611133,...,0.629679,0.605150,0.349856,0.567598,0.921928,0.032541,0.579741,0.770342,0.477788,0.923114
229,fpn,efficientnet-b2,full_sp,2,0.496115,0.632606,0.587265,0.459895,0.624238,0.588292,...,0.609786,0.557849,0.295140,0.575956,0.924221,0.000000,0.637420,0.679587,0.409580,0.918437
230,fpn,efficientnet-b2,rgb,1,0.484329,0.656480,0.572501,0.433314,0.589116,0.569671,...,0.594922,0.488090,0.300356,0.583143,0.907095,0.047909,0.625754,0.731442,0.396758,0.786891


In [27]:
metric_names = set(all_resuls_df.columns) - set(['decoder', 'encoder', 'bands', 'stride'])
metric_names = list(metric_names)

pareto_metric_names = [
    'mean_non_appl_iou',
    'mean_appl_iou',
    'mean_non_appl_precision',
    'mean_appl_precision',
    'mean_non_appl_recall',
    'mean_appl_recall',
    ]

decoders_list = all_resuls_df['decoder'].unique().tolist()
for decoder_name in decoders_list:
    decoder_df = all_resuls_df[all_resuls_df['decoder']==decoder_name]
    pareto_front_df = build_pareto_front_maximize(decoder_df, objectives=pareto_metric_names)

    print(f'decoder:{len(decoder_df)};pareto_front:{len(pareto_front_df)}')
    #print(pareto_front_df[['decoder', 'encoder', 'bands', 'stride']+metric_names])
pareto_front_df

decoder:32;pareto_front:13
decoder:32;pareto_front:12
decoder:32;pareto_front:14
decoder:32;pareto_front:12


Unnamed: 0,decoder,encoder,bands,stride,mean_non_appl_iou,mean_non_appl_precision,mean_non_appl_recall,mean_appl_iou,mean_appl_precision,mean_appl_recall,...,val_recall_mean,val_recall_natural_ground,val_recall_natural_grow,val_recall_natural_wetland,val_recall_natural_wood,val_recall_quasi_natural_ground,val_recall_quasi_natural_grow,val_recall_quasi_natural_wetland,val_recall_transport,val_recall_water
136,unet++,tu-cspdarknet53,10-20m,1,0.502326,0.676109,0.592895,0.478333,0.669801,0.597806,...,0.616156,0.496711,0.383658,0.590568,0.920287,0.049714,0.602403,0.677595,0.504603,0.869813
138,unet++,tu-cspdarknet53,10m,1,0.526654,0.697683,0.615034,0.463729,0.643278,0.586408,...,0.624034,0.456103,0.390003,0.564029,0.935499,0.070703,0.62569,0.732568,0.493448,0.917847
139,unet++,tu-cspdarknet53,10m,2,0.514366,0.689069,0.598407,0.488401,0.643976,0.622135,...,0.628057,0.628857,0.349646,0.581581,0.928456,0.04796,0.62575,0.677232,0.472388,0.915346
140,unet++,tu-cspdarknet53,full_sp,1,0.507947,0.697385,0.586877,0.477396,0.632481,0.617149,...,0.619898,0.533348,0.383311,0.62173,0.930205,0.079367,0.589533,0.631681,0.475905,0.917468
142,unet++,tu-cspdarknet53,rgb,1,0.489304,0.659043,0.585139,0.484039,0.637123,0.626724,...,0.622428,0.568966,0.39792,0.612638,0.927371,0.042597,0.592929,0.790259,0.484962,0.774773
152,unet++,efficientnet-b2,10-20m,1,0.487369,0.677121,0.564686,0.48868,0.679182,0.609316,...,0.604948,0.503176,0.395081,0.607172,0.931837,0.048425,0.573007,0.499844,0.500326,0.920422
155,unet++,efficientnet-b2,10m,2,0.487168,0.689642,0.550492,0.47158,0.653412,0.608815,...,0.597055,0.61774,0.45021,0.416281,0.951029,0.040122,0.505153,0.570716,0.459129,0.909178
184,unet++,tu-maxvit_tiny_rw_224,10-20m,1,0.486854,0.719708,0.562651,0.459005,0.672686,0.566411,...,0.588195,0.461887,0.37189,0.489523,0.942346,0.038059,0.535489,0.539038,0.478568,0.922059
186,unet++,tu-maxvit_tiny_rw_224,10m,1,0.496788,0.68341,0.581784,0.484094,0.664288,0.597044,...,0.609796,0.557282,0.313017,0.579838,0.938037,0.060131,0.551565,0.569486,0.519698,0.907736
187,unet++,tu-maxvit_tiny_rw_224,10m,2,0.500464,0.670774,0.587605,0.46607,0.680651,0.567641,...,0.602396,0.451679,0.315833,0.560407,0.942646,0.05286,0.603291,0.581593,0.49802,0.915537


In [None]:
pareto_front_df = build_pareto_front_maximize(all_resuls_df, objectives=pareto_metric_names)
pareto_front_df

Unnamed: 0,decoder,encoder,bands,stride,mean_non_appl_iou,mean_non_appl_precision,mean_non_appl_recall,mean_appl_iou,mean_appl_precision,mean_appl_recall,...,val_recall_mean,val_recall_natural_ground,val_recall_natural_grow,val_recall_natural_wetland,val_recall_natural_wood,val_recall_quasi_natural_ground,val_recall_quasi_natural_grow,val_recall_quasi_natural_wetland,val_recall_transport,val_recall_water
1,fcn1,tu-cspdarknet53,10-20m,2,0.497622,0.701926,0.579892,0.471685,0.649926,0.592341,...,0.607083,0.54662,0.360539,0.534528,0.927677,0.051003,0.561106,0.708927,0.439071,0.882413
2,fcn1,tu-cspdarknet53,10m,1,0.517524,0.68098,0.616809,0.453593,0.670547,0.561873,...,0.616151,0.364905,0.359624,0.60192,0.921042,0.053427,0.643407,0.753455,0.483987,0.916663
4,fcn1,tu-cspdarknet53,full_sp,1,0.498432,0.664138,0.601362,0.476517,0.663894,0.601751,...,0.622139,0.515086,0.413648,0.575327,0.902941,0.091434,0.513191,0.75032,0.473646,0.926632
42,unet,tu-cspdarknet53,10m,1,0.509294,0.682907,0.5914,0.483956,0.64593,0.615319,...,0.621659,0.571801,0.359022,0.594439,0.936015,0.045794,0.632706,0.645225,0.503083,0.898256
43,unet,tu-cspdarknet53,10m,2,0.528317,0.704331,0.625192,0.471337,0.665577,0.58423,...,0.628833,0.493761,0.359133,0.543843,0.940182,0.059822,0.654927,0.792563,0.500568,0.907229
44,unet,tu-cspdarknet53,full_sp,1,0.510353,0.663927,0.611917,0.486039,0.64519,0.621922,...,0.635307,0.561593,0.365022,0.639993,0.92108,0.084111,0.638028,0.691624,0.507023,0.914943
46,unet,tu-cspdarknet53,rgb,1,0.473775,0.649568,0.563331,0.496632,0.647327,0.634881,...,0.613499,0.643262,0.377803,0.595569,0.922888,0.050178,0.598547,0.612837,0.477862,0.802242
66,fpn,densenet121,10m,1,0.530129,0.646972,0.630081,0.45399,0.636671,0.574043,...,0.627966,0.611729,0.251831,0.491914,0.940699,0.013924,0.705589,0.851571,0.435978,0.898391
90,fcn1,tu-maxvit_tiny_rw_224,10m,2,0.500459,0.669651,0.592935,0.477644,0.673265,0.58809,...,0.612729,0.512591,0.355218,0.554375,0.930178,0.033366,0.588761,0.674911,0.48513,0.911673
91,fcn1,tu-maxvit_tiny_rw_224,full_sp,2,0.489262,0.659615,0.574264,0.457069,0.682594,0.555095,...,0.590504,0.41073,0.302362,0.569595,0.937693,0.044093,0.591381,0.573972,0.431056,0.92123


In [29]:
best_chebyshev = chebyshev_selection(pareto_front_df, objectives=pareto_metric_names)
best_chebyshev

decoder                                                 fcn1
encoder                                tu-maxvit_tiny_rw_224
bands                                                full_sp
stride                                                     2
mean_non_appl_iou                                   0.489262
mean_non_appl_precision                             0.659615
mean_non_appl_recall                                0.574264
mean_appl_iou                                       0.457069
mean_appl_precision                                 0.682594
mean_appl_recall                                    0.555095
val_iou_UNLABELED                                   0.816932
val_iou_buildings_territory                          0.72419
val_iou_mean                                        0.507344
val_iou_natural_ground                              0.380477
val_iou_natural_grow                                0.199012
val_iou_natural_wetland                             0.449298
val_iou_natural_wood    

# Черновики

In [None]:
def gather_experimental_info(paths_to_results_csv_list):
    all_results = {}
    for path_to_results in tqdm(paths_to_results_csv_list):
        split_path = path_to_results.split(os.sep)
        path_to_root = os.path.join(*split_path[:-2])
        path_to_training_config = os.path.join(path_to_root, 'training_config.yaml')
        with open(path_to_training_config) as fd:
            training_config = yaml.load(fd, yaml.Loader)

        multispecter_bands_indices = training_config['multispecter_bands_indices']
        multispecter_bands_indices = tuple(multispecter_bands_indices)

        results_df = pd.read_csv(path_to_results)
        #best_idx = 
        best_results = results_df.loc[results_df['val_iou_mean'].argmax()]
        best_mean_iou = best_results['val_iou_mean']
        all_results[str(multispecter_bands_indices)] = best_mean_iou

    return all_results.sort_values(ascending=False)

In [None]:
paths_to_results_csv_list = glob.glob(r'z:\mikhail_u\best_bands_search\experiment_2025-0915T23-27-52\*\version_0\*.csv')
all_results = {}
for path_to_results in tqdm(paths_to_results_csv_list):
    split_path = path_to_results.split(os.sep)
    path_to_root = os.path.join(*split_path[:-2])
    path_to_training_config = os.path.join(path_to_root, 'training_config.yaml')
    with open(path_to_training_config) as fd:
        training_config = yaml.load(fd, yaml.Loader)

    multispecter_bands_indices = training_config['multispecter_bands_indices']
    multispecter_bands_indices = tuple(multispecter_bands_indices)

    results_df = pd.read_csv(path_to_results)
    #best_idx = 
    best_results = results_df.loc[results_df['val_iou_mean'].argmax()]
    best_mean_iou = best_results['val_iou_mean']
    all_results[str(multispecter_bands_indices)] = best_mean_iou

all_results = pd.Series(all_results).sort_values(ascending=False)
all_results.iloc[:10]

100%|██████████| 63/63 [00:12<00:00,  5.12it/s]


(1, 2, 3, 5, 7, 11, 12)          0.502728
(1, 2, 3, 7)                     0.502400
(1, 2, 3, 4, 7, 8, 12)           0.501387
(1, 2, 3, 5, 7, 12)              0.501343
(1, 2, 3, 4, 5, 6, 7, 11, 12)    0.500892
(1, 2, 3, 6, 7, 11, 12)          0.500331
(1, 2, 3, 5, 7, 8, 12)           0.500116
(1, 2, 3, 4, 7, 8)               0.499612
(1, 2, 3, 4, 6, 7, 11, 12)       0.499446
(1, 2, 3, 4, 6, 7, 12)           0.499181
dtype: float64

In [8]:
tuple(multispecter_bands_indices)

(1, 2, 3, 7, 11, 12)

In [4]:
all_results

(1, 2, 3, 7)                     0.502400
(1, 2, 3, 4, 7)                  0.494251
(1, 2, 3, 5, 7)                  0.492749
(1, 2, 3, 6, 7)                  0.486393
(1, 2, 3, 7, 8)                  0.486313
                                   ...   
(1, 2, 3, 4, 5, 6, 7, 8, 12)     0.488103
(1, 2, 3, 4, 5, 6, 7, 11, 12)    0.500892
(1, 2, 3, 4, 5, 7, 8, 11, 12)    0.492552
(1, 2, 3, 4, 6, 7, 8, 11, 12)    0.481136
(1, 2, 3, 5, 6, 7, 8, 11, 12)    0.488969
Length: 63, dtype: float64