ND

In [1]:
import numpy as np
import copy
import os
import yaml
yaml.Dumper.ignore_aliases = lambda *args : True
import utils

import importlib
importlib.reload(utils)

<module 'utils' from '/n/home12/cfpark00/ML/ToyCompDiff/utils.py'>

In [2]:
config_nd={
    'experiment_directory': "/n/holylfs06/LABS/finkbeiner_lab/Users/cfpark00/datadir/ML/vectorCG/info_scale/info_scale_1",
    "dataset": "vec",
    'seed': None,
    "dim": 3,
    'n_classes': 8,
    'means': [
        [-1.,-1.,-1],
        [-1.,-1.,1.],
        [-1.,1.,-1.],
        [-1.,1.,1.],
        [1.,-1.,-1],
        [1.,-1.,1],
        [1.,1.,-1],
        [1.,1.,1]
        ],
    'covs': [
        [[0.03,0.0,0.0],
         [0.0,0.03,0.0],
         [0.0,0.0,0.03]],
    ]*8,
    'noise_covs':[
        [[0.0,0.0,0.0],
         [0.0,0.0,0.0],
         [0.0,0.0,0.0]], 
    ]*8,
    'n_samples_train':[4096,4096,4096,0,4096,0,0,0],
    'n_samples_train_gen':[4096,4096,4096,0,4096,0,0,0],
    'n_samples_test':[0,0,0,4096,0,4096,4096,4096],
    'n_samples_test_gen':[0,0,0,4096,0,4096,4096,4096],
    "model_params":{
        "model_type": "VDiff",
        "network_type": "MLP",
        "hidden_dims": [1024],
        "init_scale": 1.0,
        "optimizer_type": "AdamW",
        "optimizer_params": {"lr": 0.001,"weight_decay": 0.01},
        "data_noise":1e-3,
        "beta_settings": {"type":"logsnr","noise_schedule":"learned_linear","gamma_min":-5, "gamma_max":10},
    },
    "batch_size": 128,
    "train_ratio": 0.85,
    'num_steps': 20000,
    'save_steps': 15,

    #figure parameters
    "fig_x": [[1.,0.,0.],[1.,0.,0.]],
    "fig_y": [[0.,1.,0.],[0.,0.,1.]],
}

In [3]:
def get_config(dim_params):
    dim=dim_params["dim"]
    dim_nuisance=dim_params["dim_nuisance"]
    n_per_train=dim_params["n_per_train"]
    n_per_gen=dim_params["n_per_gen"]
    perdim=dim_params["perdim"]
    if isinstance(perdim,list):
        perdim=np.array(perdim)
    else:
        perdim=np.full(dim,perdim)
    m=dim_params["m"]
    if isinstance(m,list):
        m=np.array(m)
    else:
        m=np.full(dim,m)
    scale=(perdim-1)*m
    scalesq=scale[None,:]*scale[:,None]
    s=dim_params["s"]
    if isinstance(s,list):
        s=np.array(s)
    else:
        s=np.full(dim,s)
    s_n=dim_params["s_n"]
    if isinstance(s,list):
        s_n=np.array(s_n)
    else:
        s_n=np.full(dim,s_n)
    batch_size=dim_params["batch_size"]
    assert n_per_train%batch_size==0
    assert n_per_gen%batch_size==0
    

    ######
    n_classes=int(np.prod(perdim))
    inds=np.stack(np.meshgrid(*[np.arange(d) for d in perdim],indexing="ij"),axis=-1)
    means=inds.astype(np.float32)*m
    if "translate" in dim_params:
        means+=np.array(dim_params["translate"])
    means/=scale
    means=means.reshape(-1,dim).tolist()
    cov=(np.eye(dim)*s[:,None]*s[None,:])/scalesq
    cov=cov.tolist()
    covs=[cov]*n_classes
    noise_cov=(np.eye(dim)*s_n[:,None]*s_n[None,:])/scalesq
    noise_cov=noise_cov.tolist()
    noise_covs=[noise_cov]*n_classes
    ##
    if "train_inds" not in dim_params:
        n_nz=(inds!=0).sum(-1)
        train_inds=np.logical_or(n_nz==0,n_nz==1).flatten()
    else:
        train_inds=dim_params["train_inds"]
        if train_inds.dtype!=bool:
            train_inds_=train_inds
            train_inds=np.zeros(n_classes,dtype=bool)
            train_inds[train_inds_]=True
    test_inds=np.logical_not(train_inds)
    n_samples_train=np.zeros(n_classes,dtype=np.int32)
    n_samples_train[train_inds]=n_per_train
    n_samples_train=n_samples_train.tolist()
    n_samples_test=np.zeros(n_classes,dtype=np.int32)
    n_samples_test[~train_inds]=n_per_gen#intentional
    n_samples_test=n_samples_test.tolist()

    n_samples_train_gen=np.zeros(n_classes,dtype=np.int32)
    n_samples_train_gen[train_inds]=n_per_gen
    n_samples_train_gen=n_samples_train_gen.tolist()
    n_samples_test_gen=np.zeros(n_classes,dtype=np.int32)
    n_samples_test_gen[~train_inds]=n_per_gen
    n_samples_test_gen=n_samples_test_gen.tolist()
    #
    fig_x=np.zeros((dim-1,dim+dim_nuisance))
    fig_y=np.zeros((dim-1,dim+dim_nuisance))
    for j_ax in range(dim-1):
        fig_x[j_ax,0]=1
        fig_y[j_ax,j_ax+1]=1
    fig_x=fig_x.tolist()
    fig_y=fig_y.tolist()

    config_=copy.deepcopy(config_nd)

    #global
    config_["perdim"]=perdim.tolist()
    config_["dim"]=dim
    config_["dim_nuisance"]=dim_nuisance
    

    #data
    config_["inds_md"]=inds.reshape(-1,dim).tolist()
    config_["means"]=means
    config_["covs"]=covs
    config_["noise_covs"]=noise_covs
    config_["n_samples_train"]=n_samples_train
    config_["n_samples_train_gen"]=n_samples_train_gen
    config_["n_samples_test"]=n_samples_test
    config_["n_samples_test_gen"]=n_samples_test_gen
    config_["fig_x"]=fig_x
    config_["fig_y"]=fig_y

    #model
    config_["n_classes"]=n_classes
    config_["batch_size"]=batch_size

    return config_


In [4]:
dim_params=dict(dim=2,
dim_nuisance=1,#FIX
perdim=2,#FIX
m=1.0,#FIX
s=0.05,
s_n=0.0,#FIX
n_per_train=1024,
n_per_gen=128,
batch_size=32
)

In [5]:
for dim in [2,3]:
    for seed in range(3):
        experiment_name=f"dim={dim}_seed={seed}"
        fol=f"2x2/base"

        ###########settings
        dim_params_=copy.deepcopy(dim_params)
        dim_params_["dim"]=dim
        
        config_=get_config(dim_params_)
        config_["seed"]=seed
        config_['num_steps']=600_000
        config_['save_steps']=60
        config_["nl_nuisance"]=True
        ##############
        
        config_=utils.process_config(config_,check_only=True)
        experiment_directory=os.path.join("data",fol,experiment_name)
        config_["experiment_directory"]=experiment_directory
        yaml_fol=os.path.join("yamls",fol)
        os.makedirs(yaml_fol,exist_ok=True)
        yaml_path=os.path.join(yaml_fol,experiment_name+".yaml")
        yaml.dump(config_,open(yaml_path,"w"))

In [8]:
for s in [0.001,0.003,0.01,0.03,0.1]:
    for dim in [2,3]:
        for seed in range(3):
            experiment_name=f"dim={dim}_seed={seed}"
            fol=f"2x2/s={s}"

            ###########settings
            dim_params_=copy.deepcopy(dim_params)
            dim_params_["dim"]=dim
            dim_params_["s"]=s
            
            config_=get_config(dim_params_)
            config_["seed"]=seed
            config_['num_steps']=600_000
            config_['save_steps']=60
            config_["nl_nuisance"]=True
            ##############
            
            config_=utils.process_config(config_,check_only=True)
            experiment_directory=os.path.join("data",fol,experiment_name)
            config_["experiment_directory"]=experiment_directory
            yaml_fol=os.path.join("yamls",fol)
            os.makedirs(yaml_fol,exist_ok=True)
            yaml_path=os.path.join(yaml_fol,experiment_name+".yaml")
            yaml.dump(config_,open(yaml_path,"w"))

In [10]:
for wd in [0.0025,0.005,0.01,0.02,0.04]:
    for s in [0.01,0.03,0.05,0.07,0.09]:
        for dim in [2,3]:
            for seed in range(3):
                experiment_name=f"dim={dim}_seed={seed}"
                fol=f"2x2/wd={wd}_s={s}"

                ###########settings
                dim_params_=copy.deepcopy(dim_params)
                dim_params_["dim"]=dim
                dim_params_["s"]=s
                
                config_=get_config(dim_params_)
                config_["seed"]=seed
                config_['num_steps']=600_000
                config_['save_steps']=60
                config_["nl_nuisance"]=True
                config_["model_params"]["optimizer_params"]["weight_decay"]=wd
                ##############
                
                config_=utils.process_config(config_,check_only=True)
                experiment_directory=os.path.join("data",fol,experiment_name)
                config_["experiment_directory"]=experiment_directory
                yaml_fol=os.path.join("yamls",fol)
                os.makedirs(yaml_fol,exist_ok=True)
                yaml_path=os.path.join(yaml_fol,experiment_name+".yaml")
                yaml.dump(config_,open(yaml_path,"w"))

In [11]:
for wd in [0.005,0.01,0.02]:
    for seed in range(3):
        experiment_name=f"seed={seed}"
        fol=f"2x2/s1={0.07}_s2={0.03}_wd={wd}"

        ###########settings
        dim_params_=copy.deepcopy(dim_params)
        dim_params_["dim"]=2
        dim_params_["s"]=[0.07,0.03]
        
        config_=get_config(dim_params_)
        config_["seed"]=seed
        config_['num_steps']=600_000
        config_['save_steps']=60
        config_["nl_nuisance"]=True
        config_["model_params"]["optimizer_params"]["weight_decay"]=wd
        ##############
        
        config_=utils.process_config(config_,check_only=True)
        experiment_directory=os.path.join("data",fol,experiment_name)
        config_["experiment_directory"]=experiment_directory
        yaml_fol=os.path.join("yamls",fol)
        os.makedirs(yaml_fol,exist_ok=True)
        yaml_path=os.path.join(yaml_fol,experiment_name+".yaml")
        yaml.dump(config_,open(yaml_path,"w"))

In [12]:
for wd in [0.005,0.01,0.02]:
    for seed in range(3):
        experiment_name=f"seed={seed}"
        fol=f"2x2/s1={0.07}_s2={0.03}_s3={0.07}_wd={wd}"

        ###########settings
        dim_params_=copy.deepcopy(dim_params)
        dim_params_["dim"]=3
        dim_params_["s"]=[0.07,0.03,0.07]
        
        config_=get_config(dim_params_)
        config_["seed"]=seed
        config_['num_steps']=600_000
        config_['save_steps']=60
        config_["nl_nuisance"]=True
        config_["model_params"]["optimizer_params"]["weight_decay"]=wd
        ##############
        
        config_=utils.process_config(config_,check_only=True)
        experiment_directory=os.path.join("data",fol,experiment_name)
        config_["experiment_directory"]=experiment_directory
        yaml_fol=os.path.join("yamls",fol)
        os.makedirs(yaml_fol,exist_ok=True)
        yaml_path=os.path.join(yaml_fol,experiment_name+".yaml")
        yaml.dump(config_,open(yaml_path,"w"))

In [5]:
for wd in [0.01]:
    for seed in range(3):
        experiment_name=f"seed={seed}"
        fol=f"2x2/s1={0.1}_s2={0.01}_s3={0.1}_wd={wd}"

        ###########settings
        dim_params_=copy.deepcopy(dim_params)
        dim_params_["dim"]=3
        dim_params_["s"]=[0.1,0.01,0.1]
        
        config_=get_config(dim_params_)
        config_["seed"]=seed
        config_['num_steps']=600_000
        config_['save_steps']=60
        config_["nl_nuisance"]=True
        config_["model_params"]["optimizer_params"]["weight_decay"]=wd
        ##############
        
        config_=utils.process_config(config_,check_only=True)
        experiment_directory=os.path.join("data",fol,experiment_name)
        config_["experiment_directory"]=experiment_directory
        yaml_fol=os.path.join("yamls",fol)
        os.makedirs(yaml_fol,exist_ok=True)
        yaml_path=os.path.join(yaml_fol,experiment_name+".yaml")
        yaml.dump(config_,open(yaml_path,"w"))

In [12]:
for dim in [2,3]:
    for seed in range(3):
        experiment_name=f"dim={dim}_seed={seed}"
        fol=f"2x2/10dn"

        ###########settings
        dim_params_=copy.deepcopy(dim_params)
        dim_params_["dim"]=dim
        dim_params_["s"]=0.05
        dim_params_["dim_nuisance"]=10
        dim_params_["n_per_gen"]=1024#for 2**nuisance 
        
        config_=get_config(dim_params_)
        config_["seed"]=seed
        config_['num_steps']=600_000
        config_['save_steps']=60
        config_["nl_nuisance"]=True
        config_["model_params"]["optimizer_params"]["weight_decay"]=0.01
        ##############
        
        config_=utils.process_config(config_,check_only=True)
        experiment_directory=os.path.join("data",fol,experiment_name)
        config_["experiment_directory"]=experiment_directory
        yaml_fol=os.path.join("yamls",fol)
        os.makedirs(yaml_fol,exist_ok=True)
        yaml_path=os.path.join(yaml_fol,experiment_name+".yaml")
        yaml.dump(config_,open(yaml_path,"w"))

In [11]:
config_["n_samples_train_gen"]

[128, 128, 128, 0, 128, 0, 0, 0]

In [13]:
for nh in [2,4]:
    for dim in [2,3]:
        for seed in range(3):
            experiment_name=f"dim={dim}_seed={seed}"
            fol=f"2x2/nh={nh}"

            ###########settings
            dim_params_=copy.deepcopy(dim_params)
            dim_params_["dim"]=dim
            dim_params_["s"]=0.05
            dim_params_["dim_nuisance"]=10
            dim_params_["n_per_gen"]=1024#for 2**nuisance 
            
            config_=get_config(dim_params_)
            config_["seed"]=seed
            config_['num_steps']=600_000
            config_['save_steps']=60
            config_["nl_nuisance"]=True
            config_["model_params"]["optimizer_params"]["weight_decay"]=0.01
            config_["model_params"]["hidden_dims"]=[1024]*nh
            ##############
            
            config_=utils.process_config(config_,check_only=True)
            experiment_directory=os.path.join("data",fol,experiment_name)
            config_["experiment_directory"]=experiment_directory
            yaml_fol=os.path.join("yamls",fol)
            os.makedirs(yaml_fol,exist_ok=True)
            yaml_path=os.path.join(yaml_fol,experiment_name+".yaml")
            yaml.dump(config_,open(yaml_path,"w"))

In [20]:
import glob
import os
yaml_files=glob.glob("./yamls/2x2/wd*_s=*/*.yaml")

n_proc=10
chunked=[yaml_files[i::n_proc] for i in range(n_proc)]
for i,chunk in enumerate(chunked):
    with open(f"yamls_{i}.txt","w") as f:
        for path in chunk:
            path=os.path.abspath(path)
            f.write(path+"\n")

In [9]:
for s in [0.03,0.1,0.3]:
    for dim in [2,3]:
        for seed in range(3):
            experiment_name=f"dim={dim}_seed={seed}"
            fol=f"2x2/ymean_s={s}"

            ###########settings
            dim_params_=copy.deepcopy(dim_params)
            dim_params_["dim"]=dim
            dim_params_["s"]=s
            
            config_=get_config(dim_params_)
            config_["seed"]=seed
            config_['num_steps']=600_000
            config_['save_steps']=60
            config_["nl_nuisance"]=True
            config_["ymean"]=True
            ##############
            
            config_=utils.process_config(config_,check_only=True)
            experiment_directory=os.path.join("data",fol,experiment_name)
            config_["experiment_directory"]=experiment_directory
            yaml_fol=os.path.join("yamls",fol)
            os.makedirs(yaml_fol,exist_ok=True)
            yaml_path=os.path.join(yaml_fol,experiment_name+".yaml")
            yaml.dump(config_,open(yaml_path,"w"))