In [9]:
import pandas as pd
import numpy as np
import yaml
import os
import glob

import re

from tqdm import tqdm
import warnings
from matplotlib import pyplot as plt
%matplotlib inline

# Сбор результатов

In [23]:
paths_to_results_csv_list = glob.glob(r'Z:\mikhail_u\segmentation_results_DATCHIKI\*\*\version_0\metrics.csv')
applicable_surfaces = [
    "natural_ground",
    "natural_grow",
    "natural_wetland",
    "natural_wood",
]

non_applicable_surfaces = [
    "buildings_territory",
    "quasi_natural_ground",
    "quasi_natural_grow",
    "quasi_natural_wetland",
    "transport",
    "water",   
]

appl_num = len(applicable_surfaces)
non_appl_num = len(non_applicable_surfaces)

all_results = []
for path_to_metrics_csv in tqdm(paths_to_results_csv_list):
    split_path = path_to_metrics_csv.split(os.sep)
    nn_name = split_path[-3]
    if 'custom_' in nn_name:
        nn_name = nn_name.replace('custom_', '')

    nn_arch, band_stride = nn_name.split('__')
    nn_arch = nn_arch.split('_')
    decoder = nn_arch[0]
    encoder = '_'.join(nn_arch[1:])
    
    match = re.match(r"^b_(.*)_st_(.*)$", band_stride)
    if match:
        band = match.group(1)  # "substr1"
        stride = match.group(2)  # "substr2"

    stride = stride.split(' ')[0]

    decoder_encoder_bands_stride = {
        #'name': nn_name,
        'decoder': decoder,
        'encoder': encoder,
        'bands': band,
        'stride': stride,
    }

    decoder_encoder_bands_stride = pd.Series(data=decoder_encoder_bands_stride)

    metrics_df = pd.read_csv(path_to_metrics_csv)
    val_metrics = [c for c in metrics_df.columns if c.startswith('val_')]
    best_val_results = metrics_df[metrics_df['val_iou_mean']==metrics_df['val_iou_mean'].max()]
    best_val_results = best_val_results[val_metrics].iloc[0]
    # вычисление средних значений метрик для 
    iou_appl_metrics = [
        n for n in best_val_results.index
        if n.split('val_iou_')[-1] in applicable_surfaces]

    precision_appl_metrics = [
        n for n in best_val_results.index
        if n.split('val_precision_')[-1] in applicable_surfaces]

    recall_appl_metrics = [
        n for n in best_val_results.index
        if n.split('val_recall_')[-1] in applicable_surfaces]

    iou_non_appl_metrics = [
        n for n in best_val_results.index
        if n.split('val_iou_')[-1] in non_applicable_surfaces]

    precision_non_appl_metrics = [
        n for n in best_val_results.index
        if n.split('val_precision_')[-1] in non_applicable_surfaces]

    recall_non_appl_metrics = [
        n for n in best_val_results.index
        if n.split('val_recall_')[-1] in non_applicable_surfaces]
    mean_appl_iou = best_val_results[iou_appl_metrics].mean().item()
    mean_appl_precision = best_val_results[precision_appl_metrics].mean().item()
    mean_appl_recall = best_val_results[recall_appl_metrics].mean().item()

    mean_non_appl_iou = best_val_results[iou_non_appl_metrics].mean().item()
    mean_non_appl_precision = best_val_results[precision_non_appl_metrics].mean().item()
    mean_non_appl_recall = best_val_results[recall_non_appl_metrics].mean().item()
    #####################################
    mean_appl_surfaces = pd.Series(
        data={
            'mean_non_appl_iou': mean_appl_iou,
            'mean_non_appl_precision': mean_appl_precision,
            'mean_non_appl_recall': mean_appl_recall,
            'mean_appl_iou': mean_appl_iou,
            'mean_appl_precision': mean_appl_precision,
            'mean_appl_recall': mean_appl_recall,
            }
    )
    #appl_results = 
    results_entry = pd.concat([decoder_encoder_bands_stride, mean_appl_surfaces, best_val_results])

    all_results.append(results_entry)

all_results = pd.DataFrame(all_results)
all_results.to_csv('saving_dir/all_results.csv', index=False)

100%|██████████| 240/240 [00:24<00:00,  9.73it/s]


In [None]:
all_results_df = pd.read_csv(r'saving_dir\all_results.csv')
encoders_list = all_results_df['encoder'].unique()
decoders_list = all_results_df['decoder'].unique()
encoders_list = ['tu-cspdarknet53', 'efficientnet-b2', 'tu-maxvit_tiny_rw_224']
decoders_list = ['fcn1', 'unet++', 'unet', 'fpn']
saving_metrics_names = ['iou', 'precision', 'recall']

bands_names_list = ['rgb', '10m', '10-20m', 'full_sp']
strides_list = [1, 2]

right_order_classes = [
    'buildings_territory',
    'transport',
    'water',
    'quasi_natural_ground',
    'quasi_natural_grow',
    'quasi_natural_wetland',
    'natural_ground',
    'natural_grow',
    'natural_wetland',
    'natural_wood',
    'UNLABELED',
    'mean_non_appl',
    'mean_appl',
    'mean',

    ]

for decoder_name in decoders_list:
    decoder_df = all_results_df[all_results_df['decoder']==decoder_name]
    for saving_metric_name in saving_metrics_names:
        metric_names = np.array([c for c in all_results_df.columns if saving_metric_name in c])
        class_names_args = []
        for n in right_order_classes:
            for i, col in enumerate(metric_names):
                if n == 'mean':
                    #print(n, col)
                    if n in col and 'appl' not in col:
                        class_names_args.append(i)
                        break
                else:
                    if n in col:
                        class_names_args.append(i)
                        break


        metric_names = metric_names[class_names_args].tolist()
        columns_multiindex = pd.MultiIndex.from_product([bands_names_list, strides_list])
        rows_multiindex = pd.MultiIndex.from_product([metric_names, encoders_list])

        new_results_df = pd.DataFrame(columns=columns_multiindex, index=rows_multiindex)
        for metric_name in metric_names:
            for encoder_name in encoders_list:
                #condition =  & decoder_df['encoder']==encoder_name
                encoder_df = decoder_df[decoder_df['encoder']==encoder_name]
                row_index = (metric_name, encoder_name)
                
                for band_name in bands_names_list:
                    for stride in strides_list:
                        condition = (encoder_df['bands'] == band_name) & (encoder_df['stride'] == stride)
                        band_stride_df = encoder_df[condition]
                        metric_val = band_stride_df[metric_name].iloc[0]
                        col_index = (band_name, stride)
                        new_results_df.loc[row_index, col_index] = np.round(metric_val, decimals=3).item()
        
        saving_name = f'{decoder_name}_{saving_metric_name}.csv'
        path_to_save = os.path.join('saving_dir', saving_name)

        new_results_df.to_csv(path_to_save)

                    

#metric_columns[class_names_args].tolist()
#print(metric_val)
new_results_df

Unnamed: 0_level_0,Unnamed: 1_level_0,rgb,rgb,10m,10m,10-20m,10-20m,full_sp,full_sp
Unnamed: 0_level_1,Unnamed: 1_level_1,1,2,1,2,1,2,1,2
val_recall_buildings_territory,tu-cspdarknet53,0.845,0.845,0.842,0.853,0.829,0.849,0.853,0.84
val_recall_buildings_territory,efficientnet-b2,0.846,0.856,0.85,0.866,0.842,0.843,0.872,0.879
val_recall_buildings_territory,tu-maxvit_tiny_rw_224,0.865,0.875,0.877,0.901,0.853,0.832,0.861,0.872
val_recall_transport,tu-cspdarknet53,0.493,0.429,0.519,0.465,0.472,0.443,0.477,0.481
val_recall_transport,efficientnet-b2,0.397,0.397,0.527,0.393,0.475,0.363,0.478,0.41
val_recall_transport,tu-maxvit_tiny_rw_224,0.458,0.422,0.495,0.446,0.469,0.459,0.46,0.399
val_recall_water,tu-cspdarknet53,0.777,0.822,0.907,0.926,0.918,0.906,0.922,0.906
val_recall_water,efficientnet-b2,0.787,0.783,0.9,0.909,0.924,0.915,0.923,0.918
val_recall_water,tu-maxvit_tiny_rw_224,0.862,0.788,0.917,0.897,0.922,0.916,0.921,0.92
val_recall_quasi_natural_ground,tu-cspdarknet53,0.054,0.02,0.011,0.07,0.054,0.0,0.005,0.039


: 

In [85]:
np.round(14.883, decimals=3)

np.float64(14.883)

In [71]:
df.loc[('val_iou_natural_ground', 'efficientnet-b2')]

rgb      1    NaN
         2    NaN
10m      1    NaN
         2    NaN
10-20m   1    NaN
         2    NaN
full_sp  1    NaN
         2    NaN
Name: (val_iou_natural_ground, efficientnet-b2), dtype: object

In [3]:
paths_to_results_csv_list[0]

'Z:\\mikhail_u\\segmentation_results_DATCHIKI\\fcn1_tu-cspdarknet53_2025-10-10T18-04-00\\fcn1_tu-cspdarknet53__b_10-20m_st_1 2025-10-10T18-04-17\\version_0\\metrics.csv'

In [22]:

for path_to_metric in tqdm(paths_to_results_csv_list):
    path_to_metrics_dir, _ = os.path.split(path_to_metric)
    path_to_train_confusion = os.path.join(path_to_metrics_dir, 'train_confusion_matrices.csv')
    path_to_val_confusion = os.path.join(path_to_metrics_dir, 'val_confusion_matrices.csv')
    val_confusion_df = pd.read_csv(path_to_val_confusion)
    train_confusion_df = pd.read_csv(path_to_train_confusion)
    metrics_df = pd.read_csv(path_to_metric)
    
    epochs_list = val_confusion_df['epoch'].unique()
    classes_list = val_confusion_df['classes'].unique()

    # замещаем результаты recall
    for class_name in classes_list:
        train_recall = f'train_recall_{class_name}'
        val_recall = f'val_recall_{class_name}'
        metrics_df[train_recall] = [np.nan for i in range(len(metrics_df))]
        metrics_df[val_recall] = [np.nan for i in range(len(metrics_df))]

    
    for epoch in epochs_list:
        train_epoch_confusion = train_confusion_df[train_confusion_df['epoch']==epoch]
        val_epoch_confusion = val_confusion_df[val_confusion_df['epoch']==epoch]
        if epoch==0:
            val_epoch_confusion = val_epoch_confusion.iloc[len(classes_list):]

        epoch_metrics_df = metrics_df[metrics_df['epoch']==epoch]
        train_index = epoch_metrics_df[~epoch_metrics_df['train_precision_mean'].isna()].index[0].item()
        val_index = epoch_metrics_df[~epoch_metrics_df['val_precision_mean'].isna()].index[0].item()

        train_recall_list = []
        val_recall_list = []
        for class_name in classes_list:
            val_class_value = val_epoch_confusion[val_epoch_confusion['classes']==class_name][class_name]
            val_recall = val_class_value/val_epoch_confusion[val_epoch_confusion['classes']==class_name][classes_list].sum(axis=1)
            val_recall = val_recall.item()

            train_class_value = train_epoch_confusion[train_epoch_confusion['classes']==class_name][class_name]
            train_recall = train_class_value/train_epoch_confusion[train_epoch_confusion['classes']==class_name][classes_list].sum(axis=1)
            train_recall = train_recall.item()
            train_recall_list.append(train_recall)
            val_recall_list.append(val_recall)

            train_recall_name = f'train_recall_{class_name}'
            val_recall_name = f'val_recall_{class_name}'
            
            metrics_df.loc[val_index, val_recall_name] = val_recall
            metrics_df.loc[train_index, train_recall_name] = train_recall

        mean_train_recall = np.mean(train_recall_list).item()
        mean_val_recall = np.mean(val_recall_list).item()
        metrics_df.loc[train_index, 'train_recall_mean'] = mean_train_recall
        metrics_df.loc[val_index, 'val_recall_mean'] = mean_val_recall
    metrics_df.to_csv(path_to_metric, index=False)

            
        


train_recall_name

100%|██████████| 240/240 [52:09<00:00, 13.04s/it]


'train_recall_water'

In [19]:
train_index = epoch_metrics_df[~epoch_metrics_df['train_precision_mean'].isna()].index[0].item()
val_index = epoch_metrics_df[~epoch_metrics_df['val_precision_mean'].isna()].index[0].item()

train_index, val_index

(599, 598)

In [77]:
epoch_metrics_df[epoch_metrics_df['train_iou_UNLABELED'].isna()].index

Index([598], dtype='int64')

In [67]:
epoch = 1
val_epoch_confusion = train_confusion_df[train_confusion_df['epoch']==epoch]
val_epoch_confusion = val_confusion_df[val_confusion_df['epoch']==epoch]

class_value = val_epoch_confusion[val_epoch_confusion['classes']=='UNLABELED']['UNLABELED']
recall = class_value/val_epoch_confusion[val_epoch_confusion['classes']=='UNLABELED'][classes_list].sum(axis=1)
precision = class_value/val_epoch_confusion['UNLABELED'].sum(axis=0)
precision.item(), recall.item()

(0.9771934908151985, 0.8312823234242184)

In [70]:
metrics_df[metrics_df['epoch']==epoch]['val_precision_UNLABELED']

2    0.977193
3         NaN
Name: val_precision_UNLABELED, dtype: float64

In [None]:
metrics_df[metrics_df['epoch']==epoch]['val_precision_UNLABELED'].iloc[0] = 'pidr'


You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  metrics_df[metrics_df['epoch']==epoch]['val_precision_UNLABELED'].iloc[0] = 'pidr'
  metrics_df[metrics_df['epoch']==epoch]['val_precision_UNLABELED'].iloc[0] = 'pidr'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation

np.float64(0.9771934747695924)

In [None]:
metrics_names = ['val_iou', 'val_precision', 'val_recall']
iou_appl_metrics = [
    n for n in best_val_results.index
    if n.split('val_iou_')[-1] in applicable_surfaces]

precision_appl_metrics = [
    n for n in best_val_results.index
    if n.split('val_precision_')[-1] in applicable_surfaces]

recall_appl_metrics = [
    n for n in best_val_results.index
    if n.split('val_recall_')[-1] in applicable_surfaces]

iou_non_appl_metrics = [
    n for n in best_val_results.index
    if n.split('val_iou_')[-1] in non_applicable_surfaces]

precision_non_appl_metrics = [
    n for n in best_val_results.index
    if n.split('val_precision_')[-1] in non_applicable_surfaces]

recall_non_appl_metrics = [
    n for n in best_val_results.index
    if n.split('val_recall_')[-1] in non_applicable_surfaces]
mean_appl_iou = best_val_results[iou_appl_metrics].mean().item()
mean_appl_precision = best_val_results[precision_appl_metrics].mean().item()
mean_appl_recall = best_val_results[recall_appl_metrics].mean().item()

mean_non_appl_iou = best_val_results[iou_non_appl_metrics].mean().item()
mean_non_appl_precision = best_val_results[precision_non_appl_metrics].mean().item()
mean_non_appl_recall = best_val_results[recall_non_appl_metrics].mean().item()
mean_non_appl_iou, mean_non_appl_precision, mean_non_appl_recall
mean_appl_iou, mean_appl_precision, mean_appl_recall


(0.44228480011224747, 0.6340819597244263, 0.6340819597244263)

In [13]:
re.split(r'(val_iou_|val_precision_|val_recall_)', 'val_iou_mean')

['', 'val_iou_', 'mean']

In [55]:
s = 'b_full_sp_st_1 pidr'
match = re.match(r"^b_(.*)_st_(.*)(?:\s|$)", s)
print(match)


if match:
    band = match.group(1)  # "substr1"
    stride = match.group(2)  # "substr2"

    print(band, stride)

<re.Match object; span=(0, 19), match='b_full_sp_st_1 pidr'>
full_sp 1 pidr


In [53]:
match.group(3)

IndexError: no such group

In [21]:
nn_arch, band_stride = nn_name.split('__')
#band_stride = band_stride
nn_arch = nn_arch.split('_')
decoder = nn_arch[0]
encoder = '_'.join(nn_arch[1:])
match = re.match(r"^b_(.*)_st_(.*)\s", band_stride)


if match:
    band = match.group(1)  # "substr1"
    stride = match.group(2)  # "substr2"

band, stride

('rgb', '2')

In [5]:
metrics_df[metrics_df['val_iou_mean']==metrics_df['val_iou_mean'].max()]

Unnamed: 0,epoch,step,train_iou_UNLABELED,train_iou_buildings_territory,train_iou_mean,train_iou_natural_ground,train_iou_natural_grow,train_iou_natural_wetland,train_iou_natural_wood,train_iou_quasi_natural_ground,...,val_recall_mean,val_recall_natural_ground,val_recall_natural_grow,val_recall_natural_wetland,val_recall_natural_wood,val_recall_quasi_natural_ground,val_recall_quasi_natural_grow,val_recall_quasi_natural_wetland,val_recall_transport,val_recall_water
564,282,29997,,,,,,,,,...,0.67182,0.68503,0.347784,0.681957,0.821558,0.103899,0.629661,0.856785,0.520096,0.933146


# Черновики

In [None]:
def gather_experimental_info(paths_to_results_csv_list):
    all_results = {}
    for path_to_results in tqdm(paths_to_results_csv_list):
        split_path = path_to_results.split(os.sep)
        path_to_root = os.path.join(*split_path[:-2])
        path_to_training_config = os.path.join(path_to_root, 'training_config.yaml')
        with open(path_to_training_config) as fd:
            training_config = yaml.load(fd, yaml.Loader)

        multispecter_bands_indices = training_config['multispecter_bands_indices']
        multispecter_bands_indices = tuple(multispecter_bands_indices)

        results_df = pd.read_csv(path_to_results)
        #best_idx = 
        best_results = results_df.loc[results_df['val_iou_mean'].argmax()]
        best_mean_iou = best_results['val_iou_mean']
        all_results[str(multispecter_bands_indices)] = best_mean_iou

    return all_results.sort_values(ascending=False)

In [None]:
paths_to_results_csv_list = glob.glob(r'z:\mikhail_u\best_bands_search\experiment_2025-0915T23-27-52\*\version_0\*.csv')
all_results = {}
for path_to_results in tqdm(paths_to_results_csv_list):
    split_path = path_to_results.split(os.sep)
    path_to_root = os.path.join(*split_path[:-2])
    path_to_training_config = os.path.join(path_to_root, 'training_config.yaml')
    with open(path_to_training_config) as fd:
        training_config = yaml.load(fd, yaml.Loader)

    multispecter_bands_indices = training_config['multispecter_bands_indices']
    multispecter_bands_indices = tuple(multispecter_bands_indices)

    results_df = pd.read_csv(path_to_results)
    #best_idx = 
    best_results = results_df.loc[results_df['val_iou_mean'].argmax()]
    best_mean_iou = best_results['val_iou_mean']
    all_results[str(multispecter_bands_indices)] = best_mean_iou

all_results = pd.Series(all_results).sort_values(ascending=False)
all_results.iloc[:10]

100%|██████████| 63/63 [00:12<00:00,  5.12it/s]


(1, 2, 3, 5, 7, 11, 12)          0.502728
(1, 2, 3, 7)                     0.502400
(1, 2, 3, 4, 7, 8, 12)           0.501387
(1, 2, 3, 5, 7, 12)              0.501343
(1, 2, 3, 4, 5, 6, 7, 11, 12)    0.500892
(1, 2, 3, 6, 7, 11, 12)          0.500331
(1, 2, 3, 5, 7, 8, 12)           0.500116
(1, 2, 3, 4, 7, 8)               0.499612
(1, 2, 3, 4, 6, 7, 11, 12)       0.499446
(1, 2, 3, 4, 6, 7, 12)           0.499181
dtype: float64

In [8]:
tuple(multispecter_bands_indices)

(1, 2, 3, 7, 11, 12)

In [4]:
all_results

(1, 2, 3, 7)                     0.502400
(1, 2, 3, 4, 7)                  0.494251
(1, 2, 3, 5, 7)                  0.492749
(1, 2, 3, 6, 7)                  0.486393
(1, 2, 3, 7, 8)                  0.486313
                                   ...   
(1, 2, 3, 4, 5, 6, 7, 8, 12)     0.488103
(1, 2, 3, 4, 5, 6, 7, 11, 12)    0.500892
(1, 2, 3, 4, 5, 7, 8, 11, 12)    0.492552
(1, 2, 3, 4, 6, 7, 8, 11, 12)    0.481136
(1, 2, 3, 5, 6, 7, 8, 11, 12)    0.488969
Length: 63, dtype: float64