In [1]:
import json
import wandb
import copy

In [2]:
class SweepDict(dict):
    
    def __init__(self, existing = None):
        if existing is None:
            super(SweepDict, self).__init__()
        else:
            super(SweepDict, self).__init__(existing)
            
        self.pop('$schema')
            
            
    def replace_property(self, full_path: str, values: dict):
        new_params = copy.deepcopy(self)
        act = new_params
        path = full_path.split('.')[:-1]
        prop_name = full_path.split('.')[-1]
        for prop in path:
            act = act[prop]['parameters']
            
        if not prop_name in act.keys():
            raise Exception(f"Invalid Key {prop_name}, must be any of {list(act.keys())}")
        act[prop_name] = values
        return new_params
    
    

with open('sweep_base.json', 'r') as f:
    base = SweepDict(json.load(f))

In [3]:
params = (base
          .replace_property('dataset.augmentation', {'value': 'flip-perspective'})
          .replace_property("dataset.dataset", {"values": ["COCO-karpathy", "COCO-karpathy-llama2", "COCO-karpathy-llama"]})
          .replace_property('model.d_ffn', {'value': 1396})
          .replace_property('model.num_layers', {'value': 5})
          .replace_property('model.num_heads', {'value': 16})
          .replace_property('model.dropout', {'value': 0.2})
          .replace_property('train.early_stopping', {"values": [100, 300]})
          .replace_property('train.optimizer.args.weight_decay', {"value": 0.00412})
          .replace_property('train.optimizer.base_lr', {"value": 0.00004})
          .replace_property('train.resulting_batch_size', {"value": 4096})
          .replace_property('train.label_smoothing', {"value": 0.02})
          )

In [4]:
params

{'dataset': {'parameters': {'augmentation': {'value': 'flip-perspective'},
   'batch_size': {'value': 256},
   'dataset': {'values': ['COCO-karpathy',
     'COCO-karpathy-llama2',
     'COCO-karpathy-llama']},
   'eval_dataset': {'value': 'COCO-karpathy'},
   'eval_batch_size': {'value': 1024},
   'grouped': {'value': False},
   'num_workers': {'value': 24},
   'prefetch_factor': {'value': 4}}},
 'model': {'parameters': {'cross_attention': {'value': False},
   'd_ffn': {'value': 1396},
   'd_model': {'value': 768},
   'dropout': {'value': 0.2},
   'gpt_embedding': {'value': False},
   'image_encoder': {'value': 'CLIP/ViT-B-16'},
   'image_encoder_frozen': {'value': False},
   'max_seq_len': {'value': 30},
   'num_heads': {'value': 16},
   'num_layers': {'value': 5},
   'pos_encoding': {'value': 'learned'},
   'share_embedding': {'value': True},
   'text_pre_ln': {'value': True},
   'torch_attn': {'value': True},
   'vocab_size': {'value': 28999}}},
 'train': {'parameters': {'autocast':

In [7]:
sweep_config = {
    "name": "final llm-comp",
    "method": "grid",
    "metric" : {
        "name": "cider_mean",
        "goal": "maximize"
    },
    "parameters": params
}
id = wandb.sweep(sweep_config, "hpoeche-team", "git-hp-search")

Create sweep with ID: k9fjycmk
Sweep URL: https://wandb.ai/hpoeche-team/git-hp-search/sweeps/k9fjycmk
