In [8]:
import itertools
import pandas as pd

from utils import vae_default_args, hmm_default_args, rnn_default_args
from utils import get_all_dataset_names

In [110]:
dataset_names_lst = get_all_dataset_names()
seq_length_lst = [100, 100, 100, 20, 20, 20, 50, 50, 50, 238, 51, 51, 51]
commands_lst = []
logger = open("./scripts/grid_search_datasets.sh", 'w')
dataset_length_map = dict(zip(dataset_names_lst, seq_length_lst))
print(dataset_length_map)

{'synthetic_unimodal_data_length_50_uniform': 50, 'synthetic_unimodal_data_length_20_skewed_gaussian': 20, 'synthetic_unimodal_data_length_50_gaussian': 50, 'synthetic_unimodal_data_length_20_gaussian': 20, 'synthetic_unimodal_data_length_50_skewed_gaussian': 50, 'synthetic_multimodal_data_modes_2_length_51_uniform': 51, 'synthetic_unimodal_data_length_20_uniform': 20, 'synthetic_multimodal_data_modes_5_length_51_uniform': 51, 'synthetic_unimodal_data_length_100_gaussian': 100, 'gfp': 238, 'synthetic_unimodal_data_length_100_uniform': 100, 'synthetic_multimodal_data_modes_3_length_51_uniform': 51, 'synthetic_unimodal_data_length_100_skewed_gaussian': 100}


In [111]:
# vae commands
hidden_size_lst = [100, 200]
batch_size_lst = [10, 20]
learning_rate_lst = [0.003, 0.001, 0.0003]
model = 'vae'
for i, (dataset, hidden_size, batch_size, learning_rate) in enumerate(itertools.product(dataset_names_lst, hidden_size_lst, batch_size_lst, learning_rate_lst)):
    name = "{0}_hidden_size_{1}_batch_size_{2}_lr_{3}".format(model, hidden_size, batch_size, learning_rate)
    seq_length = dataset_length_map[dataset]
    input_size = seq_length * 21 if dataset == 'gfp' else seq_length * 20
    command = 'python3 run_model.py --model_type {0} --base_log logs/{1}/{2}/ --name {3} --input {4} --hidden_size {5} --latent_dim 20 --seq_length {6} --pseudo_count 1 --n_jobs 5 --device cuda:{7} --learning_rate {8} --epochs 100 --batch_size {9} --layers 1 --dataset {10} --num_data 1000 --early_stopping True --patience 10'.format(model, dataset, model, name, input_size, hidden_size, seq_length, i % 4, learning_rate, batch_size, dataset)
    print(command, file=logger)
    commands_lst.append(command)
print(commands_lst)

    

['python3 run_model.py --model_type vae --base_log logs/synthetic_unimodal_data_length_100_gaussian/vae/ --name vae_hidden_size_100_batch_size_10_lr_0.003 --input 2000 --hidden_size 100 --latent_dim 20 --seq_length 100 --pseudo_count 1 --n_jobs 5 --device cuda:0 --learning_rate 0.003 --epochs 100 --batch_size 10 --layers 1 --dataset synthetic_unimodal_data_length_100_gaussian --num_data 1000 --early_stopping True --patience 10', 'python3 run_model.py --model_type vae --base_log logs/synthetic_unimodal_data_length_100_gaussian/vae/ --name vae_hidden_size_100_batch_size_10_lr_0.001 --input 2000 --hidden_size 100 --latent_dim 20 --seq_length 100 --pseudo_count 1 --n_jobs 5 --device cuda:1 --learning_rate 0.001 --epochs 100 --batch_size 10 --layers 1 --dataset synthetic_unimodal_data_length_100_gaussian --num_data 1000 --early_stopping True --patience 10', 'python3 run_model.py --model_type vae --base_log logs/synthetic_unimodal_data_length_100_gaussian/vae/ --name vae_hidden_size_100_batc

In [112]:
# rnn commands
hidden_size_lst = [100, 200]
learning_rate_lst = [0.003, 0.001, 0.0003, 0.0001]
model = 'rnn'
for i, (dataset, hidden_size, learning_rate) in enumerate(itertools.product(dataset_names_lst, hidden_size_lst, learning_rate_lst)):
    name = "{0}_hidden_size_{1}_lr_{2}".format(model, hidden_size, learning_rate)
    seq_length = dataset_length_map[dataset]
    input_size = seq_length * 21 if dataset == 'gfp' else seq_length * 20
    command = 'python3 run_model.py --model_type {0} --base_log logs/{1}/{2}/ --name {3} --input {4} --hidden_size {5} --latent_dim 20 --seq_length {6} --pseudo_count 1 --n_jobs 5 --device cuda:{7} --learning_rate {8} --epochs 100 --batch_size 10 --layers 1 --dataset {9} --num_data 1000 --early_stopping True --patience 10'.format(model, dataset, model, name, input_size, hidden_size, seq_length, i % 4, learning_rate, dataset)
    print(command, file=logger)
    commands_lst.append(command)
print(commands_lst)

    

['python3 run_model.py --model_type vae --base_log logs/synthetic_unimodal_data_length_100_gaussian/vae/ --name vae_hidden_size_100_batch_size_10_lr_0.003 --input 2000 --hidden_size 100 --latent_dim 20 --seq_length 100 --pseudo_count 1 --n_jobs 5 --device cuda:0 --learning_rate 0.003 --epochs 100 --batch_size 10 --layers 1 --dataset synthetic_unimodal_data_length_100_gaussian --num_data 1000 --early_stopping True --patience 10', 'python3 run_model.py --model_type vae --base_log logs/synthetic_unimodal_data_length_100_gaussian/vae/ --name vae_hidden_size_100_batch_size_10_lr_0.001 --input 2000 --hidden_size 100 --latent_dim 20 --seq_length 100 --pseudo_count 1 --n_jobs 5 --device cuda:1 --learning_rate 0.001 --epochs 100 --batch_size 10 --layers 1 --dataset synthetic_unimodal_data_length_100_gaussian --num_data 1000 --early_stopping True --patience 10', 'python3 run_model.py --model_type vae --base_log logs/synthetic_unimodal_data_length_100_gaussian/vae/ --name vae_hidden_size_100_batc

In [113]:
# hmm commands
hidden_size_lst = [20, 30, 40, 50]
model = 'hmm'
for i, (dataset, hidden_size) in enumerate(itertools.product(dataset_names_lst, hidden_size_lst)):
    name = "{0}_hidden_size_{1}".format(model, hidden_size)
    seq_length = dataset_length_map[dataset]
    input_size = seq_length * 21 if dataset == 'gfp' else seq_length * 20
    command = 'python3 run_model.py --model_type {0} --base_log logs/{1}/{2}/ --name {3} --input {4} --hidden_size {5} --latent_dim 20 --seq_length {6} --pseudo_count 1 --n_jobs 5 --device cpu --learning_rate -1 --epochs 100 --batch_size 10 --layers 1 --dataset {7} --num_data 1000 --early_stopping True --patience 10'.format(model, dataset, model, name, input_size, hidden_size, seq_length, dataset)
    print(command, file=logger)
    commands_lst.append(command)
print(commands_lst)

    

['python3 run_model.py --model_type vae --base_log logs/synthetic_unimodal_data_length_100_gaussian/vae/ --name vae_hidden_size_100_batch_size_10_lr_0.003 --input 2000 --hidden_size 100 --latent_dim 20 --seq_length 100 --pseudo_count 1 --n_jobs 5 --device cuda:0 --learning_rate 0.003 --epochs 100 --batch_size 10 --layers 1 --dataset synthetic_unimodal_data_length_100_gaussian --num_data 1000 --early_stopping True --patience 10', 'python3 run_model.py --model_type vae --base_log logs/synthetic_unimodal_data_length_100_gaussian/vae/ --name vae_hidden_size_100_batch_size_10_lr_0.001 --input 2000 --hidden_size 100 --latent_dim 20 --seq_length 100 --pseudo_count 1 --n_jobs 5 --device cuda:1 --learning_rate 0.001 --epochs 100 --batch_size 10 --layers 1 --dataset synthetic_unimodal_data_length_100_gaussian --num_data 1000 --early_stopping True --patience 10', 'python3 run_model.py --model_type vae --base_log logs/synthetic_unimodal_data_length_100_gaussian/vae/ --name vae_hidden_size_100_batc

In [114]:
logger.close()

In [115]:
f = open("./scripts/grid_search_datasets.sh", 'r')
commands_cnt = 0
for line in f.readlines():
    print(line)
    commands_cnt += 1
assert(commands_cnt == 312)

python3 run_model.py --model_type vae --base_log logs/synthetic_unimodal_data_length_100_gaussian/vae/ --name vae_hidden_size_100_batch_size_10_lr_0.003 --input 2000 --hidden_size 100 --latent_dim 20 --seq_length 100 --pseudo_count 1 --n_jobs 5 --device cuda:0 --learning_rate 0.003 --epochs 100 --batch_size 10 --layers 1 --dataset synthetic_unimodal_data_length_100_gaussian --num_data 1000 --early_stopping True --patience 10

python3 run_model.py --model_type vae --base_log logs/synthetic_unimodal_data_length_100_gaussian/vae/ --name vae_hidden_size_100_batch_size_10_lr_0.001 --input 2000 --hidden_size 100 --latent_dim 20 --seq_length 100 --pseudo_count 1 --n_jobs 5 --device cuda:1 --learning_rate 0.001 --epochs 100 --batch_size 10 --layers 1 --dataset synthetic_unimodal_data_length_100_gaussian --num_data 1000 --early_stopping True --patience 10

python3 run_model.py --model_type vae --base_log logs/synthetic_unimodal_data_length_100_gaussian/vae/ --name vae_hidden_size_100_batch_size