# On A100 GPU

In [None]:
import os

# Template for slurm job files
format = """#!/bin/bash
#SBATCH --job-name=convert_{dataset}_{miss_pattern}
#SBATCH --partition=kisski
#SBATCH --gres=gpu:A100:1
#SBATCH --cpus-per-task=16
#SBATCH --mem=80G
#SBATCH --time=2-00:00:00
#SBATCH --output=convert_{dataset}_{miss_pattern}_%j.out
#SBATCH --error=convert_{dataset}_{miss_pattern}_%j.err
#SBATCH --mail-type=FAIL # Email on start, end, failure
#SBATCH --mail-user=hao.huang@tib.eu # <-- Replace with your real email
echo "===== JOB STARTED ====="
echo "Hostname: $(hostname)"
echo "Date: $(date)"
echo "User: $USER"
# Load environment
module load miniforge3
# module load cuda/11.8
module load gcc/13.2.0
module load cuda/12.6.2

# Set up conda
source "$(conda info --base)/etc/profile.d/conda.sh"
conda activate /mnt/vast-kisski/projects/kisski-tib-activecl/cenv
# Diagnostics
echo "Which python: $(which python)"
python -c "import torch; print('Torch:', torch.__version__, '| CUDA:', torch.cuda.is_available())"
# Navigate to project directory
cd ~/Data-BaLu 
# Run scripts
echo "--- Running: run_data_prepare.py"

# python -u run_data_prepare.py --dataset {dataset} --missing_p 0.1 --imputer 'ori_grape' --n_attr 20 --relation_sim 1 --n_rel 4
# python -u run_data_prepare.py --dataset {dataset} --missing_p 0.3 --imputer 'ori_grape' --n_attr 20 --relation_sim 1 --n_rel 4

# python -u run_data_prepare.py --dataset {dataset} --miss_pattern {miss_pattern} --missing_p 0.0 --n_attr 20 --relation_sim 0 --n_rel 1
# python -u run_data_prepare.py --dataset {dataset} --miss_pattern {miss_pattern} --missing_p 0.1 --n_attr 20 --relation_sim 0 --n_rel 1
python -u run_data_prepare.py --dataset {dataset} --miss_pattern {miss_pattern} --missing_p {miss_p} --n_attr 20 --relation_sim 0 --n_rel 1


echo "===== JOB COMPLETED ====="
echo "Date: $(date)"
"""

# List of datasets
datasets = ['Syn', 'BlogCatalog1', 'Flickr1', 'Youtube'] # 'Youtube',
miss_patterns = ['MCAR'] #, 'MAR']
miss_ps = [0.0, 0.1, 0.3]

# Create slurm directory if it doesn't exist
slurm_dir = 'slurm_gpu'
os.makedirs(slurm_dir, exist_ok=True)

# Create slurm files and collect sbatch commands
sbatch_commands = []

for dataset in datasets:
    for miss_pattern in miss_patterns:
        for miss_p in miss_ps:
            filename = os.path.join(slurm_dir, f"convert_{dataset}_{miss_pattern}_{miss_p}.slurm")
            
            job_content = format.format(dataset=dataset, miss_pattern=miss_pattern, miss_p=miss_p)
            with open(filename, 'w') as f:
                f.write(job_content)
            os.chmod(filename, 0o755)
            sbatch_commands.append(f"sbatch {filename}")
            
    # print(f"Created slurm file: {filename}")

# Print sbatch commands
# print("\nCommands to submit jobs:")
print("cd Data-BaLu")
for cmd in sbatch_commands:
    print(cmd)

# Create a batch script to submit all jobs at once
batch_file = os.path.join(slurm_dir, "submit_all_jobs.sh")
with open(batch_file, 'w') as f:
    f.write("#!/bin/bash\n\n# Submit all conversion jobs\n\n")
    for cmd in sbatch_commands:
        f.write(f"{cmd}\n")

# Make batch file executable
os.chmod(batch_file, 0o755)

# print(f"\nAlternatively, run all jobs with: {batch_file}")

cd Data-BaLu
sbatch slurm_gpu/convert_Syn_MCAR_0.0.slurm
sbatch slurm_gpu/convert_Syn_MCAR_0.1.slurm
sbatch slurm_gpu/convert_Syn_MCAR_0.3.slurm
sbatch slurm_gpu/convert_BlogCatalog1_MCAR_0.0.slurm
sbatch slurm_gpu/convert_BlogCatalog1_MCAR_0.1.slurm
sbatch slurm_gpu/convert_BlogCatalog1_MCAR_0.3.slurm
sbatch slurm_gpu/convert_Flickr1_MCAR_0.0.slurm
sbatch slurm_gpu/convert_Flickr1_MCAR_0.1.slurm
sbatch slurm_gpu/convert_Flickr1_MCAR_0.3.slurm
sbatch slurm_gpu/convert_Youtube_MCAR_0.0.slurm
sbatch slurm_gpu/convert_Youtube_MCAR_0.1.slurm
sbatch slurm_gpu/convert_Youtube_MCAR_0.3.slurm
