# Make shell script for neural network.

In [1]:
import pandas as pd
import numpy as np
import os, glob

In [2]:
# ===============================================================
# Perturbation type.
# ===============================================================
pert_list = [
    'trt_sh.cgs', 
    'trt_oe'
]
# pert_list = ['trt_oe']


# ===============================================================
# Cell selcection method.
# ===============================================================
# cell_name = "Each" # "All"：平均化データのみ使用, "Each"：各細胞種を使用
# cell_name = "All"
# cell_name = "AllCellParallel"
cell_name = "AllCell"


# ===============================================================
# Protein and Disease VAE type.
# ===============================================================
pair_vae_type_list = [ 
#     ('VAE', 'VAE'), 
    ('VQ_VAE', 'VQ_VAE'), 
#     ('Original', 'Original'), 
#     ('VAE', 'VQ_VAE'),
#     ('SQ_VAE', 'SQ_VAE'),
#     ('GaussianSQVAE_gaussian_1','GaussianSQVAE_gaussian_1')
] # target_vae_type, disease_vae_type

# ===============================================================
# Missing rate.
# ===============================================================
missing_rate = 0.0


# ===============================================================
# Target feature type.
# ===============================================================
def Get_TargetFeatureType(target_vae_type, cell_name, pert_type):
    
    if target_vae_type == 'Original':
        target_feature_type = 'VarianceTop1' # varianceが0の遺伝子を除く
        # target_feature_type = 'Original' # 全ての遺伝子を使用
    elif target_vae_type == 'VAE':
        target_feature_type = 'Epo2000_Lr0.0001_Hid512_256_128_Lat64_Bat64_Dro0.1_ActTanh'
    elif target_vae_type == 'VQ_VAE':
        if cell_name == 'All':
        #     target_feature_type = 'Epo500_Lr0.002_Hid1000_512_256_Lat128_Bat64_Comc0.25'
            target_feature_type = 'Epo500_Lr0.0002_Hid512_256_128_Lat64_Bat64_Comc0.25_Lr0.0002_ActSELU_ScaleMaxAbs'
        elif cell_name == 'AllCellParallel' and missing_rate == 0.1:
            target_feature_type = 'Epo500_Lr0.0002_Hid512_1024_512_Lat256_Bat64_Comc0.25_Dro0.1_ActLeakyReLU0.5_Missing0.1_ScaleMaxAbs'
        elif cell_name == 'AllCellParallel' and missing_rate == 0.0:
            target_feature_type = 'Epo500_Lr0.0002_Hid512_1024_512_Lat256_Bat64_Comc0.25_Dro0.1_ActLeakyReLU0.5_Missing0.0_ScaleMaxAbs'
        elif cell_name == 'AllCell' and missing_rate == 0.1:
            target_feature_type = 'Epo500_Lr0.0002_Hid512_256_128_Lat64_Bat64_Comc0.25_Dro0.1_ActLeakyReLU0.5_Missing0.1_ScaleMaxAbs'
        elif cell_name == 'AllCell' and missing_rate == 0.0:
            target_feature_type = 'Epo500_Lr0.0002_Hid512_256_128_Lat64_Bat64_Comc0.25_Dro0.1_ActLeakyReLU0.5_Missing0.0_ScaleMaxAbs'

    elif target_vae_type == 'SQ_VAE':
        target_feature_type = 'Epo500_Lr0.002_Hid1000_512_256_Lat128_Bat64_Varq5_Temp1e-05_ScaleOrig'
    elif target_vae_type == 'GaussianSQVAE_gaussian_1':
        if cell_name == 'All':
            target_feature_type = 'Epo500_Hid512_256_128_Lat64_Bat64_Varq30.0_ScaleMaxAbs'
        elif cell_name == 'AllCellParallel':
            target_feature_type = 'Epo500_Hid512_1024_512_Lat256_Bat64_Varq30.0_ActSELU_Missing0.1_ScaleMaxAbs'
    #     elif cell_name == 'AllCellParallel' and pert_type == 'trt_sh.cgs':
    #         target_feature_type = 'Epo500_Hid512_1024_512_Lat256_Bat64_Varq50.0_ActTanh_ScaleMaxAbs'
    #     elif cell_name == 'AllCellParallel' and pert_type == 'trt_oe':
    #         target_feature_type = 'Epo100_Hid512_1024_512_Lat256_Bat64_Varq30.0_ActSELU_ScaleMaxAbs'
    return target_feature_type

    
# ===============================================================
# Disease feature type.
# ===============================================================
def Get_DiseaseFeatureType(disease_vae_type):

    if disease_vae_type == 'Original':
        disease_feature_type = 'VarianceTop1' # varianceが0の遺伝子を除く
        # disease_feature_type = 'Original' # 全ての遺伝子を使用
    elif disease_vae_type == 'VAE':
        disease_feature_type = 'Epo2000_Lr0.0001_Hid1024_512_256_Lat128_Bat64_Dro0.2_ScaleMaxAbs_ActReLU'
    elif disease_vae_type == 'VQ_VAE':
        disease_feature_type = 'Epo2000_Lr0.002_Hid1000_512_256_Lat128_Bat64_Comc0.25_ScaleStd'
    elif disease_vae_type == 'SQ_VAE':
        disease_feature_type = 'Epo2000_Lr0.002_Hid1000_512_256_Lat128_Bat64_Varq5_Temp1e-05_ScaleMaxAbs'
    elif disease_vae_type == 'GaussianSQVAE_gaussian_1':
#         if disease_profile_type == 'disease_signature.tabs':
#             disease_feature_type = 'Epo1000_Hid1000_512_256_Lat128_Bat64_Varq30_Dro0.1_ActTanh_ScaleMaxAbs'
#         elif disease_profile_type == 'disease_and_patient_signature.tabs':
            disease_feature_type = 'Epo1000_Hid1000_512_256_Lat128_Bat64_Varq40_Dro0.1_ActTanh_ScaleMaxAbs'
    return disease_feature_type
    
    
# ===============================================================
# Early stopping.
# ===============================================================
early_stopping = '30'

# Disease selection.
disease_select = '79'

# Target selection.
target_select = 'Known'
# target_select = 'Otani'
# target_select = 'All'


# Hidden layer sizes.
def Get_HiddenLayerSize(target_vae_type, disease_vae_type):

    if (target_vae_type == 'Original') and (disease_vae_type == 'Original'): # input size: 978 + 14,408 = 15,386
        hidde_layer_size_list = [ 
                                  [2048,1024,512],
                                  [1024,512,256],
                                  [512,256,128],
                                  [256,128,64],
                                  [128,64,32]
                                ]
    elif (target_vae_type == 'VAE') & (disease_vae_type == 'VAE'): # input size: 64 + 128 = 192
        hidde_layer_size_list = [ [1024,512,256],
                                  [512,256,128],
                                  [256,128,64],
                                  [128,64,32]
                                ]
    elif (target_vae_type == 'VAE') & (disease_vae_type == 'VQ_VAE'): # input size: 64 + 256 = 320
        hidde_layer_size_list = [ [1024,512,256],
                                  [512,256,128],
                                  [256,128,64],
                                  [128,64,32]
                                ]
    elif (target_vae_type == 'VQ_VAE') & (disease_vae_type == 'VQ_VAE'): # 256 + 256 = 512
        hidde_layer_size_list = [ [1024,512,256],
                                  [512,256,128],
                                  [256,128,64],
                                  [128,64,32]
                                ]
    elif (target_vae_type == 'SQ_VAE') & (disease_vae_type == 'SQ_VAE'): # 256 + 256 = 512
        hidde_layer_size_list = [ [1024,512,256],
                                  [512,256,128],
                                  [256,128,64],
                                  [128,64,32]
                                ]
    elif (target_vae_type == 'GaussianSQVAE_gaussian_1') & (disease_vae_type == 'GaussianSQVAE_gaussian_1'): # 256 + 256 = 512
        hidde_layer_size_list = [ [1024,512,256],
                                  [512,256,128],
                                  [256,128,64],
                                  [128,64,32]
                                ]
    return hidde_layer_size_list


In [3]:
# コマンド記述
o_f = "./03_CrossValidation.sh"
out = open(o_f,"w")
out.write("#/bin/sh\n")
out.write("\n")
out.close()  



for pert_type in pert_list:
    for k in range(1,6):
        for target_vae_type, disease_vae_type in pair_vae_type_list:
            hidde_layer_size_list = Get_HiddenLayerSize(target_vae_type, disease_vae_type)
            
            for hidden_layer_size in hidde_layer_size_list:
                for gene_dropout in range(1,6):
                    # feature type.
                    target_feature_type = Get_TargetFeatureType(target_vae_type, cell_name, pert_type)
                    disease_feature_type = Get_DiseaseFeatureType(disease_vae_type)
                    gene_dropout /= 10 # dropout
                    hidden_layer_size_str = ' '.join([str(s) for s in hidden_layer_size]) # hidden layer size

                    out = open(o_f,"a")
                    com1 = f"python3 ./01_Pi_NN_concat.py --fold_number={k} --pert_type={pert_type} \
--gene_dropout={gene_dropout} --gene_hidden_sizes {hidden_layer_size_str} \
--target_vae_type={target_vae_type} --disease_vae_type={disease_vae_type} \
--target_select={target_select} --disease_select={disease_select} \
--target_feature_type={target_feature_type} --disease_feature_type={disease_feature_type} \
--cell_name={cell_name} --early_stopping={early_stopping} --gene_epochs=2000\n"
                    out.write(str(com1))
                    out.close()