In [1]:
import pandas as pd
import json
import yaml
from pathlib import Path

In [2]:
# Read the parameters CSV
df = pd.read_csv('parameters.csv')
df.head()

Unnamed: 0,dataset,model,seed,best_params,train_cindex_mean,train_cindex_std,train_cindex_ci_lower,train_cindex_ci_upper,train_ibs_mean,train_ibs_std,...,test_cal_mean,test_cal_std,test_cal_ci_lower,test_cal_ci_upper,time_sec_total,n_bootstrap_iters,n_trials,space_file,status,error
0,whas,converse_single,42,"{""latent_dim"": 16, ""enc_layers"": 1, ""enc_hidde...",0.799285,0.020357,0.762617,0.832199,0.138184,0.012236,...,0.136987,0.02346,0.107862,0.187446,48384.864473,20,300,spaces/converse_single.yaml,ok,
1,whas,converse_siamese,42,"{""latent_dim"": 16, ""enc_layers"": 3, ""enc_hidde...",0.798192,0.01766,0.767539,0.828837,0.133489,0.010882,...,0.12885,0.028609,0.08762,0.181634,78040.569333,20,300,spaces/converse_siamese.yaml,ok,
2,tcga_brca,converse_single,42,"{""latent_dim"": 16, ""enc_layers"": 4, ""enc_hidde...",0.819372,0.042764,0.740838,0.883258,0.101447,0.010615,...,0.096264,0.022146,0.070418,0.139289,62850.772992,20,300,spaces/converse_single.yaml,ok,
3,tcga_brca,converse_siamese,42,"{""latent_dim"": 32, ""enc_layers"": 2, ""enc_hidde...",0.779845,0.023632,0.735412,0.810965,0.103384,0.003459,...,0.116073,0.023365,0.084772,0.169274,461967.332381,20,300,spaces/converse_siamese.yaml,ok,
4,veterans,converse_single,42,"{""latent_dim"": 32, ""enc_layers"": 4, ""enc_hidde...",0.744849,0.033655,0.677668,0.804062,0.183504,0.028029,...,0.258283,0.053173,0.178803,0.353976,3355.933494,20,300,spaces/converse_single.yaml,ok,


In [3]:
def convert_params_to_yaml_format(params_dict):
    """
    Convert best parameters dictionary to YAML format with single choice values.
    """
    yaml_params = {}
    
    for key, value in params_dict.items():
        # Wrap each value in a choice list with single element
        yaml_params[key] = {'choice': [value]}
    
    return {'params': yaml_params}

In [4]:
def generate_yaml_files(df):
    """
    Generate YAML files for each row in the dataframe.
    - Folder structure: {dataset}/
    - File name: {model}.yaml (dvcsurv_var.yaml or dvcsurv_var_single.yaml)
    """
    created_files = []
    
    for idx, row in df.iterrows():
        dataset = row['dataset']
        model = row['model']
        best_params_str = row['best_params']
        
        # Parse the best_params JSON
        best_params = json.loads(best_params_str)
        
        # Convert to YAML format
        yaml_content = convert_params_to_yaml_format(best_params)
        
        # Create dataset folder
        folder = Path(dataset)
        folder.mkdir(exist_ok=True)
        
        # Create file path with model name
        file_path = folder / f"{model}.yaml"
        
        # Write YAML file
        with open(file_path, 'w') as f:
            yaml.dump(yaml_content, f, default_flow_style=False, sort_keys=False)
        
        created_files.append(str(file_path))
        print(f"Created: {file_path}")
    
    return created_files

# Generate all YAML files
created_files = generate_yaml_files(df)
print(f"\nTotal files created: {len(created_files)}")

Created: whas/converse_single.yaml
Created: whas/converse_siamese.yaml
Created: tcga_brca/converse_single.yaml
Created: tcga_brca/converse_siamese.yaml
Created: veterans/converse_single.yaml
Created: veterans/converse_siamese.yaml
Created: pbc/converse_single.yaml
Created: pbc/converse_siamese.yaml
Created: gbsg/converse_single.yaml
Created: gbsg/converse_siamese.yaml
Created: breast_cancer/converse_single.yaml
Created: breast_cancer/converse_siamese.yaml
Created: aids/converse_siamese.yaml
Created: aids/converse_single.yaml
Created: metabric/converse_single.yaml
Created: metabric/converse_siamese.yaml

Total files created: 16
