In [8]:
import json
import copy

# LSTM

In [14]:
param_list = {
    "emb_dim": [64, 128],
    "n_hidden": [16, 32, 64, 128],
    "n_RNN_layers": [1, 3, 5],
    "bidirectional": [True],
    "model": ["lstm"],
}

In [15]:
total = []
for i, (k, vals) in enumerate(param_list.items()):
    temp = []
    for v in vals:
        if i == 0:
            temp.append({k: v})
        else:
            total_temp = copy.deepcopy(total)
            for l in total_temp:
                l[k] = v
            temp += total_temp
    total = temp

In [16]:
for i in range(len(total)):
    with open(f"./model_params/lstm{i}.json", "w") as f:
        json.dump(total[i], f)

# CNN

In [8]:
param_list = {
    "model": ["cnn1d"],
    "emb_dim": [64, 128],
    "num_layer": [2, 3],
    "channels": [
        [32, 64],
        [32, 64, 128],
        [64, 128],
        [64, 128, 256],
        [128, 256],
        [128, 256, 512],
    ],
}

In [9]:
total = []
for i, (k, vals) in enumerate(param_list.items()):
    temp = []
    for v in vals:
        if i == 0:
            temp.append({k: v})
        else:
            total_temp = copy.deepcopy(total)
            for l in total_temp:
                l[k] = v
            temp += total_temp
    total = temp

In [10]:
for i in range(len(total)):
    with open(f"./model_params/cnn{i}.json", "w") as f:
        json.dump(total[i], f)

# Encoder

In [6]:
param_list = {
    "emb_dim": [64, 128],
    "d_ff": [128, 256, 512],
    "model": ["encoder"],
    "h": [4, 8],
    "n_encoder": [2, 4],
}

In [7]:
total = []
for i, (k, vals) in enumerate(param_list.items()):
    temp = []
    for v in vals:
        if i == 0:
            temp.append({k: v})
        else:
            total_temp = copy.deepcopy(total)
            for l in total_temp:
                l[k] = v
            temp += total_temp
    total = temp

for p in total:
    p["d_ff"] = p["emb_dim"] * 4

In [8]:
for i in range(len(total)):
    with open(f"./model_params/encoder{i}.json", "w") as f:
        json.dump(total[i], f)

#### Encoder run

python train.py --model_info ./save/uniprot/model/encoder6_1.json --batch_size 128 --dropout_rate 0.1 --gpu 0 --lr 1e-4 --epoch 400 --scheduler False --dataset ACP2_main --val_fold 0 --load_weight True

In [9]:
total_model_num = 4

In [43]:
param_list1 = {
    "model_info": [
        f"./model/model_params/encoder{i}.json" for i in range(total_model_num)
    ],
    "batch_size": [400],
    "dropout_rate": [0.0],
    "lr": [0.01],
    "epoch": [300],
    "scheduler": [False],
    "dataset": ["uniprot"],
    "val_fold": [1],
    # "n_out_feat": [20],
    "temp": [0.5],
}
param_list2 = {
    "batch_size": [64],
    "dropout_rate": [0.1],
    "lr": [1e-4],
    "scheduler": [False],
    "load_weight": [True, False],
    "pretrained_epoch": ["best", 50, 100, 150, 200, 250, 300],
    "model_info": [
        f"./save/uniprot/model/encoder{i}_{j}.json"
        for i in range(total_model_num)
        for j in range(1, 3)
    ],
    "val_fold": list(range(10)),
    "epoch": [300],
    "dataset": [
        "ACP2_main",
        "ACP_Mixed_80",
        "ACP2_alter",
        "LEE_Indep",
        "ACP500_ACP164",
        "ACP500_ACP2710",
    ],
}

In [44]:
def param_generator(param_list):
    total = []
    for i, (k, vals) in enumerate(param_list.items()):
        temp = []
        for v in vals:
            if i == 0:
                temp.append({k: v})
            else:
                total_temp = copy.deepcopy(total)
                for l in total_temp:
                    l[k] = v
                temp += total_temp

        total = temp

    return total

In [45]:
cmd_total1 = param_generator(param_list1)
cmd_total2 = param_generator(param_list2)

In [46]:
def cmd_gen(cmd_total1, cmd_total2):
    for i, c in enumerate(cmd_total1):
        curr_model = c["model_info"].split("/")[-1].replace(".json", "_")
        temp = " ".join([f"--{k} {v}" for k, v in c.items()])
        j = i % 5
        gpu = 0 if j == 4 else j
        with open(f"../script/train_{j}.txt", "a") as f:
            f.write(f"python train.py {temp} --gpu {j}\n")
            for _cmd_total2 in cmd_total2:
                if curr_model in _cmd_total2["model_info"]:
                    if "_1.json" in _cmd_total2["model_info"]:
                        _cmd_total2["AA_tok_len"] = 1
                    else:
                        _cmd_total2["AA_tok_len"] = 2
                    temp2 = " ".join([f"--{k} {v}" for k, v in _cmd_total2.items()])
                    f.write(f"python train.py {temp2} --gpu {j}\n")

In [47]:
cmd_gen(cmd_total1, cmd_total2)

### Single run gen

In [4]:
param_list_cont_false = {
    "model_info": [
        f"./model/model_params/{m}{i}.json"
        for m in ["encoder", "cnn", "lstm"]
        for i in range(0, 24)
    ],
    "batch_size": [128],
    "contrastive": [False],
    "dropout_rate": [0.1],
    "lr": [0.001],
    "epoch": [300],
    "scheduler": [False],
    "dataset": [
        "ACP2_main",
        "ACP_Mixed_80",
        "ACP2_alter",
        "LEE_Indep",
        "ACP500_ACP164",
        "ACP500_ACP2710",
    ],
    "val_fold": list(range(10)),
    "AA_tok_len": [1, 2],
    #    "alpha": [0.1, 0.3, 0.5, 0.7, 0.9],
    #    "tgt_model": ["first", "second", "both"],
    # "n_out_feat": [20],
    "temp": [0.5],
}

In [10]:
param_list1 = {
    "model_info": [
        f"./model/model_params/{m}{i}.json"
        for m in ["encoder", "cnn", "lstm"]
        for i in range(0, 24)
    ],
    "batch_size": [128],
    "contrastive": [True],
    "dropout_rate": [0.1],
    "lr": [0.001],
    "epoch": [300],
    "scheduler": [False],
    "dataset": [
        "ACP2_main",
        "ACP_Mixed_80",
        "ACP2_alter",
        "LEE_Indep",
        "ACP500_ACP164",
        "ACP500_ACP2710",
    ],
    "val_fold": list(range(10)),
    "alpha": [0.1, 0.3, 0.5, 0.7, 0.9],
    "tgt_model": ["first", "second", "both"],
    # "n_out_feat": [20],
    "temp": [0.5],
}

In [5]:
def param_generator(param_list):
    total = []
    for i, (k, vals) in enumerate(param_list.items()):
        temp = []
        for v in vals:
            if i == 0:
                temp.append({k: v})
            else:
                total_temp = copy.deepcopy(total)
                for l in total_temp:
                    l[k] = v
                temp += total_temp

        total = temp

    return total

In [6]:
def cmd_gen(cmd_total1):
    for i, c in enumerate(cmd_total1):
        curr_model = c["model_info"].split("/")[-1].replace(".json", "_")
        temp = " ".join([f"--{k} {v}" for k, v in c.items()])
        j = i % 5
        gpu = 0 if j == 4 else j
        with open(f"../script/train_tok_cont_{j}.txt", "a") as f:
            f.write(f"python train.py {temp} --gpu {gpu}\n")

In [9]:
cmd_gen(param_generator(param_list_cont_false))

In [11]:
cmd_gen(param_generator(param_list1))

In [12]:
import json

In [13]:
a = {1: {2:2}}

In [14]:

with open('./a.json', 'w') as f:
    json.dump(b, f, indent=4)    

In [15]:
with open('./a.json','r') as f:
    b = json.load(f)

In [17]:
b[3]={4:5}

In [18]:
b

{'1': {'2': 2}, 3: {4: 5}}

python train.py --model_info ./save/uniprot/model/encoder6_1.json --batch_size 128 --dropout_rate 0.1 --gpu 0 --lr 1e-4 --epoch 400 --scheduler False --dataset ACP2_main --val_fold 0 --load_weight True