# GPU

In [2]:
import os

cluster_map = {"A100":"""#SBATCH -p kisski
#SBATCH -G A100:1                    
#SBATCH --mem=20G""",
"H100": """#SBATCH -p kisski-h100
#SBATCH -G H100:1                    
#SBATCH --mem=20G""",
"CPU": """#SBATCH --partition=jupyter:cpu
#SBATCH --cpus-per-task=16
#SBATCH --mem=40G"""}

format = """#!/bin/bash
#SBATCH --job-name=BaLu_GNN_{dataset}_{imputer}
{cluster}
#SBATCH --time=12:00:00
#SBATCH --output=run_%x_%j.out
#SBATCH --error=run_%x_%j.err
#SBATCH --mail-type=FAIL # Email on start, end, failure
#SBATCH --mail-user=hao.huang@tib.eu # <-- Replace with your real email
echo "===== JOB STARTED ====="
echo "Hostname: $(hostname)"
echo "Date: $(date)"
echo "User: $USER"
# Load environment
module load miniforge3
module load gcc/13.2.0
module load cuda/11.8
# module load gcc/13.2.0
# module load cuda/12.6.2
# Set up conda
source "$(conda info --base)/etc/profile.d/conda.sh"
conda activate /mnt/vast-kisski/projects/kisski-tib-activecl/cenv
# Diagnostics
echo "Which python: $(which python)"
python -c "import torch; print('Torch:', torch.__version__, '| CUDA:', torch.cuda.is_available())"
# Navigate to project directory
cd /mnt/vast-kisski/projects/kisski-tib-activecl/BaLu_Plus/
echo "--- Running: run_exps.py ---"
python3 -u run_exps.py --model_name BaLu_Plus --imputer {imputer} --dataset {dataset} --missing_p 0.0  --gconv GCN --rconv GCN --imputer_node_dims {L_node_dim} --interference_node_dims {K_node_dim}
python3 -u run_exps.py --model_name BaLu_Plus --imputer {imputer} --dataset {dataset} --missing_p 0.1  --gconv GCN --rconv GCN --imputer_node_dims {L_node_dim} --interference_node_dims {K_node_dim}
python3 -u run_exps.py --model_name BaLu_Plus --imputer {imputer} --dataset {dataset} --missing_p 0.3  --gconv GCN --rconv GCN --imputer_node_dims {L_node_dim} --interference_node_dims {K_node_dim}

echo "===== JOB COMPLETED ====="
echo "Date: $(date)"
"""

datasets_org = ['Syn_M=None_SimRel=1_Rel=4', 'Youtube_M=20_SimRel=1_Rel=4', 'BlogCatalog1_M=20_SimRel=0_Rel=1', 'Flickr1_M=20_SimRel=0_Rel=1']      # network relationships not based on similarity
datasets = [e+"_MCAR" for e in datasets_org]

imputers = ['BaLu_GRAPE', 'BaLu_IGMC', 'GRAPE', 'IGMC']
Layers = [("64 64 64", "64 64"), ("64 64", "64 64"), ("64 64", "64"), ("64 64 64", "64")]
GNNs = ['GCN', 'GAT', 'GraphSAGE', 'RGCN']
rel_dropouts = [0.0, 0.1, 0.2, 0.3]
betas = [0.0, 0.0001, 0.001, 0.01, 0.1]
gammas = [0.0, 0.0001, 0.001, 0.01, 0.1]
etas = [0.0, 0.0001, 0.001, 0.01, 0.1]

default_paras = {'imputer': imputers[0],
                 'dataset': datasets[0], 
                'rel_dropout': rel_dropouts[0],
                'beta': betas[1],
                'gamma': gammas[1],
                'eta': etas[1]}

missing_ps = [0.0, 0.1, 0.3]
print("cd /mnt/vast-kisski/projects/kisski-tib-activecl/BaLu_Plus/")


################################################################################################################################################
cluster = 'A100'        # H100, CPU
slurm_dir = f'{cluster}_step1_L_K_Layer'
################################################################################################################################################

os.makedirs(slurm_dir, exist_ok=True)

def file_name(paras: dict):
    s = ""
    for k, v in paras.items():
        s += f"_{k}={v}"
    return s

for dataset in datasets:
    for imputer in imputers[:2]:    # only for 'BaLu_GRAPE', 'BaLu_IGMC'
        for layers in Layers:
            L_node_dim = layers[0]
            K_node_dim = layers[1]

            slurm_content = format.format(cluster=cluster_map[cluster], dataset=dataset, 
                                          imputer=imputer, L_node_dim=L_node_dim, K_node_dim=K_node_dim)

            L_node_dim_1 = L_node_dim.replace(" ", "-")
            K_node_dim_1 = K_node_dim.replace(" ", "-")
            filename = os.path.join(slurm_dir, f"{dataset}_imputer={imputer}_L={L_node_dim_1}_K={K_node_dim_1}.slurm")

            with open(filename, "w") as f:
                f.write(slurm_content)
            sbatch_command = f"sbatch {filename}"
            print(sbatch_command)


cd /mnt/vast-kisski/projects/kisski-tib-activecl/BaLu_Plus/
sbatch A100_step1_L_K_Layer/Syn_M=None_SimRel=1_Rel=4_MCAR_imputer=BaLu_GRAPE_L=64-64-64_K=64-64.slurm
sbatch A100_step1_L_K_Layer/Syn_M=None_SimRel=1_Rel=4_MCAR_imputer=BaLu_GRAPE_L=64-64_K=64-64.slurm
sbatch A100_step1_L_K_Layer/Syn_M=None_SimRel=1_Rel=4_MCAR_imputer=BaLu_GRAPE_L=64-64_K=64.slurm
sbatch A100_step1_L_K_Layer/Syn_M=None_SimRel=1_Rel=4_MCAR_imputer=BaLu_GRAPE_L=64-64-64_K=64.slurm
sbatch A100_step1_L_K_Layer/Syn_M=None_SimRel=1_Rel=4_MCAR_imputer=BaLu_IGMC_L=64-64-64_K=64-64.slurm
sbatch A100_step1_L_K_Layer/Syn_M=None_SimRel=1_Rel=4_MCAR_imputer=BaLu_IGMC_L=64-64_K=64-64.slurm
sbatch A100_step1_L_K_Layer/Syn_M=None_SimRel=1_Rel=4_MCAR_imputer=BaLu_IGMC_L=64-64_K=64.slurm
sbatch A100_step1_L_K_Layer/Syn_M=None_SimRel=1_Rel=4_MCAR_imputer=BaLu_IGMC_L=64-64-64_K=64.slurm
sbatch A100_step1_L_K_Layer/Youtube_M=20_SimRel=1_Rel=4_MCAR_imputer=BaLu_GRAPE_L=64-64-64_K=64-64.slurm
sbatch A100_step1_L_K_Layer/Youtube_M=2

# CPU

In [6]:
import os

cluster_map = {"A100":"""#SBATCH -p kisski
#SBATCH -G A100:1                    
#SBATCH --mem=20G""",
"H100": """#SBATCH -p kisski-h100
#SBATCH -G H100:1                    
#SBATCH --mem=20G""",
"CPU": """#SBATCH --partition=jupyter:cpu
#SBATCH --cpus-per-task=16
#SBATCH --mem=40G"""}

format = """#!/bin/bash
#SBATCH --job-name=BaLu_GNN_{dataset}_{imputer}
{cluster}
#SBATCH --time=24:00:00
#SBATCH --output=run_%x_%j.out
#SBATCH --error=run_%x_%j.err
#SBATCH --mail-type=FAIL # Email on start, end, failure
#SBATCH --mail-user=hao.huang@tib.eu # <-- Replace with your real email
echo "===== JOB STARTED ====="
echo "Hostname: $(hostname)"
echo "Date: $(date)"
echo "User: $USER"
# Load environment
module load miniforge3
module load gcc/13.2.0
module load cuda/11.8
# module load gcc/13.2.0
# module load cuda/12.6.2
# Set up conda
source "$(conda info --base)/etc/profile.d/conda.sh"
conda activate /mnt/vast-kisski/projects/kisski-tib-activecl/cenv
# Diagnostics
echo "Which python: $(which python)"
python -c "import torch; print('Torch:', torch.__version__, '| CUDA:', torch.cuda.is_available())"
# Navigate to project directory
cd /mnt/vast-kisski/projects/kisski-tib-activecl/BaLu_Plus/
echo "--- Running: run_exps.py ---"
python3 -u run_exps.py --model_name BaLu_Plus --imputer {imputer} --dataset {dataset} --missing_p 0.0  --gconv GCN --rconv GCN --imputer_node_dims {L_node_dim} --interference_node_dims {K_node_dim}
python3 -u run_exps.py --model_name BaLu_Plus --imputer {imputer} --dataset {dataset} --missing_p 0.1  --gconv GCN --rconv GCN --imputer_node_dims {L_node_dim} --interference_node_dims {K_node_dim}
python3 -u run_exps.py --model_name BaLu_Plus --imputer {imputer} --dataset {dataset} --missing_p 0.3  --gconv GCN --rconv GCN --imputer_node_dims {L_node_dim} --interference_node_dims {K_node_dim}

echo "===== JOB COMPLETED ====="
echo "Date: $(date)"
"""

datasets_org = ['Syn_M=None_SimRel=1_Rel=4', 'Youtube_M=20_SimRel=1_Rel=4', 'BlogCatalog1_M=20_SimRel=0_Rel=1', 'Flickr1_M=20_SimRel=0_Rel=1']      # network relationships not based on similarity
datasets = [e+"_MCAR" for e in datasets_org]

imputers = ['BaLu_GRAPE', 'BaLu_IGMC', 'GRAPE', 'IGMC']
Layers = [("64 64 64", "64 64"), ("64 64", "64 64"), ("64 64", "64"), ("64 64 64", "64")]
GNNs = ['GCN', 'GAT', 'GraphSAGE', 'RGCN']
rel_dropouts = [0.0, 0.1, 0.2, 0.3]
betas = [0.0, 0.0001, 0.001, 0.01, 0.1]
gammas = [0.0, 0.0001, 0.001, 0.01, 0.1]
etas = [0.0, 0.0001, 0.001, 0.01, 0.1]

default_paras = {'imputer': imputers[0],
                 'dataset': datasets[0], 
                'rel_dropout': rel_dropouts[0],
                'beta': betas[1],
                'gamma': gammas[1],
                'eta': etas[1]}

missing_ps = [0.0, 0.1, 0.3]
print("cd /mnt/vast-kisski/projects/kisski-tib-activecl/BaLu_Plus/")


################################################################################################################################################
cluster = 'CPU'        # H100, CPU
slurm_dir = f'{cluster}_step1_L_K_Layer'
################################################################################################################################################

os.makedirs(slurm_dir, exist_ok=True)

def file_name(paras: dict):
    s = ""
    for k, v in paras.items():
        s += f"_{k}={v}"
    return s

for dataset in datasets[::-1]:
    for imputer in imputers[:2]:    # only for 'BaLu_GRAPE', 'BaLu_IGMC'
        for layers in Layers:
            L_node_dim = layers[0]
            K_node_dim = layers[1]

            slurm_content = format.format(cluster=cluster_map[cluster], dataset=dataset, 
                                          imputer=imputer, L_node_dim=L_node_dim, K_node_dim=K_node_dim)

            L_node_dim_1 = L_node_dim.replace(" ", "-")
            K_node_dim_1 = K_node_dim.replace(" ", "-")
            filename = os.path.join(slurm_dir, f"{dataset}_imputer={imputer}_L={L_node_dim_1}_K={K_node_dim_1}.slurm")

            with open(filename, "w") as f:
                f.write(slurm_content)
            sbatch_command = f"sbatch {filename}"
            print(sbatch_command)


cd /mnt/vast-kisski/projects/kisski-tib-activecl/BaLu_Plus/
sbatch CPU_step1_L_K_Layer/Flickr1_M=20_SimRel=0_Rel=1_MCAR_imputer=BaLu_GRAPE_L=64-64-64_K=64-64.slurm
sbatch CPU_step1_L_K_Layer/Flickr1_M=20_SimRel=0_Rel=1_MCAR_imputer=BaLu_GRAPE_L=64-64_K=64-64.slurm
sbatch CPU_step1_L_K_Layer/Flickr1_M=20_SimRel=0_Rel=1_MCAR_imputer=BaLu_GRAPE_L=64-64_K=64.slurm
sbatch CPU_step1_L_K_Layer/Flickr1_M=20_SimRel=0_Rel=1_MCAR_imputer=BaLu_GRAPE_L=64-64-64_K=64.slurm
sbatch CPU_step1_L_K_Layer/Flickr1_M=20_SimRel=0_Rel=1_MCAR_imputer=BaLu_IGMC_L=64-64-64_K=64-64.slurm
sbatch CPU_step1_L_K_Layer/Flickr1_M=20_SimRel=0_Rel=1_MCAR_imputer=BaLu_IGMC_L=64-64_K=64-64.slurm
sbatch CPU_step1_L_K_Layer/Flickr1_M=20_SimRel=0_Rel=1_MCAR_imputer=BaLu_IGMC_L=64-64_K=64.slurm
sbatch CPU_step1_L_K_Layer/Flickr1_M=20_SimRel=0_Rel=1_MCAR_imputer=BaLu_IGMC_L=64-64-64_K=64.slurm
sbatch CPU_step1_L_K_Layer/BlogCatalog1_M=20_SimRel=0_Rel=1_MCAR_imputer=BaLu_GRAPE_L=64-64-64_K=64-64.slurm
sbatch CPU_step1_L_K_Layer/