In [2]:
import yaml

def yaml_to_nested_dict(yaml_data):
    def recurse(data):
        if isinstance(data, dict):
            nested_dict = {}
            for key, value in data.items():
                if isinstance(value, dict):
                    nested_dict[key] = {'parameters': recurse(value)}
                else:
                    nested_dict[key] = {'value': value}
            return nested_dict
        elif isinstance(data, list):
            return [{'parameters': recurse(item)} if isinstance(item, dict) else {'value': item} for item in data]
        else:
            return data
    return recurse(yaml_data)





In [3]:
# Load the YAML data from your file or a string
yaml_config = """
dataset:
  name: carotid_mutinfo_val_mini
  dataset_root: CAROTID_MUTINFO/val_mini
  input_size: 256
  images_root: images
  list: lists/images.txt
  gt_dir: labels
  pred_dir: ''
  n_classes: 2
  n_clusters: null
  features_dir: null
  preprocessed_dir: preprocessed
wandb:
  setup:
    project: pipeline_eval
    entity: alexaatm
    mode: offline
  key: 163fac3be1f95e6eeb5964f1743469286a0421ae
  tag: classNum_exp2
  watch:
    log: all
    log_freq: 1
  mode: local
loader:
  batch_size: 1
  num_workers: 0
  mode: full
model:
  name: dino_vits8
  checkpoint: ''
spectral_clustering:
  which_matrix: laplacian
  which_color_matrix: knn
  which_features: k
  normalize: true
  threshold_at_zero: true
  lapnorm: true
  K: 15
  image_downsample_factor: null
  image_color_lambda: 0.0
  multiprocessing: 0
  image_ssd_beta: 1.0
  image_dino_gamma: 1.0
  max_knn_neigbors: 80
  image_var: 0.0
multi_region_segmentation:
  adaptive: false
  non_adaptive_num_segments: 15
  infer_bg_index: true
  kmeans_baseline: false
  num_eigenvectors: 1000000
  multiprocessing: 0
bbox:
  num_erode: 2
  num_dilate: 5
  skip_bg_index: true
  downsample_factor: 8
  num_clusters: 8
  seed: 1
  pca_dim: 0
precomputed:
  mode: from_scratch
  features: ''
  eig: ''
  multi_region_segmentation: ''
  bboxes: ''
  bbox_features: ''
  bbox_clusters: ''
  segmaps: ''
  crf_segmaps: ''
crf:
  num_classes: 8
  downsample_factor: 8
  multiprocessing: 0
  w1: 10
  alpha: 80
  beta: 13
  w2: 3
  gamma: 3
  it: 5.0
vis:
  eigen: false
  crf_segmaps: false
  dino_attn_maps: false
  multiregion_segmaps: false
  segmaps: false
  crf_multi_region: false
pipeline_steps:
  dino_features: true
  eigen: true
  segments: true
  bbox: true
  bbox_features: true
  clusters: true
  sem_segm: true
  crf_segm: true
  crf_multi_region: true
  eval: true
eval:
  vis_dir: ./eval/vis
  vis_rand_k: 10
  eval_per_image: true
  eval_per_dataset: false
  iou_thresh: 0.0
  void_label: 0
custom_path_to_save_data: /home/guests/oleksandra_tmenova/test/project/thesis-codebase/deep-spectral-segmentation/outputs/carotid_mutinfo_VAL_MINI/classNum_exp/clusters15_dino1.0_ssd1.0_var0.0_norm-imagenet_preprocess-true_classN8
only_vis: false
only_eval: false
preprocessed_data: true
norm: imagenet
"""

In [None]:
yaml_data = yaml.safe_load(yaml_config)

# Convert the YAML data to the desired nested dictionary format
result = yaml_to_nested_dict(yaml_data)

# Print the result
print(result)

{'dataset': {'parameters': {'name': {'value': 'carotid_mutinfo_val_mini'}, 'dataset_root': {'value': 'CAROTID_MUTINFO/val_mini'}, 'input_size': {'value': 256}, 'images_root': {'value': 'images'}, 'list': {'value': 'lists/images.txt'}, 'gt_dir': {'value': 'labels'}, 'pred_dir': {'value': ''}, 'n_classes': {'value': 2}, 'n_clusters': {'value': None}, 'features_dir': {'value': None}, 'preprocessed_dir': {'value': 'preprocessed'}}}, 'wandb': {'parameters': {'setup': {'parameters': {'project': {'value': 'pipeline_eval'}, 'entity': {'value': 'alexaatm'}, 'mode': {'value': 'offline'}}}, 'key': {'value': '163fac3be1f95e6eeb5964f1743469286a0421ae'}, 'tag': {'value': 'classNum_exp2'}, 'watch': {'parameters': {'log': {'value': 'all'}, 'log_freq': {'value': 1}}}, 'mode': {'value': 'local'}}}, 'loader': {'parameters': {'batch_size': {'value': 1}, 'num_workers': {'value': 0}, 'mode': {'value': 'full'}}}, 'model': {'parameters': {'name': {'value': 'dino_vits8'}, 'checkpoint': {'value': ''}}}, 'spectr

In [None]:
sweep_dict = {
        "spectral_clustering":{
            'parameters': {
                'K': {'max': 20, 'min': 8}
            }
        },
        "bbox":{
                'parameters': {
                    'num_clusters': {'max': 15, 'min': 5}
                }
        }
    }

In [6]:
# Update the result dictionary with values from sweep_dict
for key, value in sweep_dict.items():
    if key in result:
        result[key]['parameters'].update(value['parameters'])

In [None]:
for key, value in sweep_dict.items():
        if key in parameters_dict:
            for subkey, subvalue in value['parameters'].items():
                if subkey in parameters_dict[key]['parameters']:
                    parameters_dict[key]['parameters'][subkey] = subvalue

In [7]:
result

{'dataset': {'parameters': {'name': {'value': 'carotid_mutinfo_val_mini'},
   'dataset_root': {'value': 'CAROTID_MUTINFO/val_mini'},
   'input_size': {'value': 256},
   'images_root': {'value': 'images'},
   'list': {'value': 'lists/images.txt'},
   'gt_dir': {'value': 'labels'},
   'pred_dir': {'value': ''},
   'n_classes': {'value': 2},
   'n_clusters': {'value': None},
   'features_dir': {'value': None},
   'preprocessed_dir': {'value': 'preprocessed'}}},
 'wandb': {'parameters': {'setup': {'parameters': {'project': {'value': 'pipeline_eval'},
     'entity': {'value': 'alexaatm'},
     'mode': {'value': 'offline'}}},
   'key': {'value': '163fac3be1f95e6eeb5964f1743469286a0421ae'},
   'tag': {'value': 'classNum_exp2'},
   'watch': {'parameters': {'log': {'value': 'all'},
     'log_freq': {'value': 1}}},
   'mode': {'value': 'local'}}},
 'loader': {'parameters': {'batch_size': {'value': 1},
   'num_workers': {'value': 0},
   'mode': {'value': 'full'}}},
 'model': {'parameters': {'name