In [None]:
import yaml
import orjson
import glob
import os
from types import SimpleNamespace

# set cwd
os.chdir('/lustre/fast/fast/pmayilvahanan/llm_line/code/llm_line')

from config import format_model_info, ModelConfig

In [None]:
# Load YAML configuration
with open('other_models.yaml', 'r') as f:
    yaml_config = yaml.safe_load(f)

# Create a lookup dictionary from YAML data
model_configs = {model['name']: model for model in yaml_config}

In [None]:
# Directory containing JSON files
dir_results = "/fast/pmayilvahanan/llm_line/results/"
string_match = "C4"

# Process each JSON file
for json_file in glob.glob(os.path.join(dir_results, '*'+string_match+'*.json')):
    try:
        # Read the JSON file
        with open(json_file, 'rb') as f:
            data = orjson.loads(f.read())
        
        # Get model name from the existing metadata
        model_name = data['model_metadata']['model_name']
        
        # If we have configuration for this model
        if model_name in model_configs:
            config = model_configs[model_name]
            
            # Update metadata with YAML configuration
            data['model_metadata'].update({
                'model_name': model_name,
                'architecture': config['architecture'],
                'dataset': config['dataset'],
                'dataset_version': config['dataset_version'],
                'dataset_size': config['dataset_size'],
                'batch_size': config['batch_size'],
                'device': config['device']
            })
            
            # Create new filename
            # Convert dict config to ModelConfig object for format_model_info
            #MC = ModelConfig()
            # config_obj = ModelConfig.from_dict(config)
            config = SimpleNamespace(**config)

            new_filename = format_model_info(config)+".json"
            new_filepath = os.path.join(dir_results, new_filename)

            # Write updated JSON to new file
            with open(new_filepath, 'wb') as f:
                f.write(orjson.dumps(data, option=orjson.OPT_INDENT_2))
            
            # Remove old file if new filename is different
            if new_filepath != json_file:
                os.remove(json_file)
                
            print(f"Updated and renamed:\nOld: {os.path.basename(json_file)}\nNew: {new_filename}\n")
            
    except orjson.JSONDecodeError as e:
        print(f"Error processing {json_file}: {e}")