In [None]:
"""
Scalars: Simple values such as strings, numbers, and booleans.

Lists: Ordered collections of scalars, represented by a hyphen (-) followed by a space and the value.

Dictionaries: Unordered collections of key-value pairs, represented by colons (:) separating the keys and values.

Documents: A YAML file can contain one or more YAML documents separated by “ — -”. Each document is treated as a standalone unit.

Comments: Lines that start with “#” are considered comments and these are ignored by YAML parsers.

"""

In [13]:
import yaml
class DictToObject:
    def __init__(self, dictionary):
        # For each key-value pair in the dictionary, set attribute with key name to the value
        for key, value in dictionary.items():
            if isinstance(value, dict):  # If value is a dictionary, convert it recursively
                value = DictToObject(value)
            self.__dict__[key] = value

    def __getattr__(self, attr):
        # This method is called when an attribute lookup has not found the attribute in the usual places.
        # It allows us to return the value for the key from the dictionary.
        return self.__dict__.get(attr, None)
    
    def __repr__(self):
        # This method is called when the repr() function is used on the object.
        # It returns a string representation of the object.
        return str(self.__dict__)
    

# Combine all documents into a dictionary of dictionaries instead of separate YAML documents
combined_content = {
    "PATH": {
        "ROOT": '/home/dosisiddhesh/latex_model',
        "DATA": '/home/dosisiddhesh/SID_DATA_PROCESSED/DATA_2',
        "DATA_PKL": '/home/dosisiddhesh/SID_DATA_PROCESSED/DATA_PICKEL',
        "MODEL": "/home/dosisiddhesh/latex_model/model_exp",
        "TOKENIZER": "/home/dosisiddhesh/MISTRAL_EXP/model/hf_tokenizer_1.0%_30000_new",
        "LOG": "/home/dosisiddhesh/latex_model/log_exp"
    },

    "PARAMETERS": {
        "D_emb": 4096,
        "Vocab": 30000,
        "D_head": 128,
        "D_FF": 14336,
        "N_Layer": 5,
        "N_Head": 32,
        "KV_Head": 8,
        "Window": 4096  # 8192
    },

    # Assuming HYPERPARAMETERS and LOGGING are empty for now
    "HYPERPARAMETERS": {
        "seq_len" : 4096,
        "grad_acc_batch_size": 32,
        "batch_size": 1,


    },

    "TRAINING_PARAM":{
        "EPOCHS": 100,
        "LEARNING_RATE": 0.0001,
        "WARMUP_STEPS": 1000,
        "EPSILON": 1e-6,
        "CLIP_NORM": 1.0,
        "float16": False,
        "adafactor": False,
        "enb_grad_checkpoint": False,     
    },

    "DATA_PARAM": {
        "train_data": "train_{0}_{1}_datasets.pkl",
        "val_data": "val_{0}_{1}_datasets.pkl",
        "test_data": "test_{0}_{1}_datasets.pkl",
        "Sam"
    },

    "LOGGING": {}
}

# Write the combined content to a YAML file
with open('config.yaml', 'w') as file:
    yaml.safe_dump(combined_content, file, sort_keys=False)

'/mnt/data/config_single.yaml'  # Return the path of the new file


'/mnt/data/config_single.yaml'

In [14]:
# Read the YAML file
with open('config.yaml', 'r') as file:
    content = yaml.safe_load(file)



In [10]:

content = DictToObject(content)



In [11]:
content.PATH.ROOT 

'/home/username/Projects/Transformer'