In [1]:
import itertools

In [2]:
import torch_spotlight.utils

In [3]:
def write_job_file(fname, header, job_list):
    with open(fname, 'w+', newline='\n') as f:
        f.write(header)
        f.write('case $PBS_ARRAY_INDEX in\n')
        for job_num, job_str in enumerate(jobs):
            f.write(f'{job_num+1})\n{job_str}\n;;\n')
        f.write('esac\n')

# Run many spotlights each

In [4]:
def generate_iterative_jobs(datasets, spotlight_sizes, num_spotlights):   
    """
    datasets: list of (
        dataset_name, 
        examples in dataset, 
        spherical (True/False), 
        dimensions to use (or None for all),
        flip_objective (True/False),
        barrier scale (or None for default)
    )
    """
    device = 'cuda' 
    inference_path = '/scratch/st-kevinlb-1/will/jupyter/spotlight/inference_results'
    spotlight_path = '/scratch/st-kevinlb-1/will/jupyter/spotlight/spotlight_results'
    
    jobs = []
    for (dataset_name, dataset_size, spherical, num_dimensions, flip_objective, barrier_scale) in datasets:
        for spotlight_fraction in spotlight_sizes:
            spotlight_num_points = int(spotlight_fraction * dataset_size)
            job = ''
            previous_outputs = []
            for spotlight_num in range(num_spotlights):
                extra_args = ''
                if spherical:
                    extra_args += '--spherical \\\n'
                if num_dimensions is not None:
                    extra_args += '--top_components %d \\\n' % (num_dimensions) 
                if flip_objective:
                    extra_args += '--flip_objective \\\n'
                if spotlight_num > 0:
                    extra_args += '--past_weights ' + ' '.join(previous_outputs) + ' \\\n'
                if barrier_scale is not None:
                    extra_args += '--barrier_scale %f \\\n' % (barrier_scale)
                output_path = ''.join([
                    spotlight_path + '/' + dataset_name,
                    '_' + str(spotlight_fraction),
                    ('_%dD' % num_dimensions if num_dimensions is not None else ''),
                    ('_spherical' if spherical else ''),
                    ('_flip' if flip_objective else ''),
                    '_' + str(spotlight_num+1) + '.pkl'
                ])
                job += f"""python /scratch/st-kevinlb-1/will/jupyter/spotlight/torch_spotlight/run_spotlight.py \\
{spotlight_num_points} \\
{inference_path}/{dataset_name}.pkl \\
{output_path} \\
--learning_rate 1e-2 \\
--lr_patience 10 \\
--print_every 20 \\
--device {device} \\
--num_steps 5000 \\
{extra_args}
"""
                previous_outputs.append(output_path)
            jobs.append(job)
    return jobs

In [5]:
jobs = generate_iterative_jobs(
    datasets = [
        ('waterbirds_train_resnet', 4795, True, None, False, 1),
    ],
    spotlight_sizes = [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2], 
    num_spotlights = 5
)

header = f"""#!/bin/bash
#PBS -l walltime=6:00:00,select=1:ncpus=1:ngpus=1:mem=32gb
#PBS -N spotlight-waterbirds
#PBS -A st-kevinlb-1-gpu
#PBS -o /scratch/st-kevinlb-1/will/jupyter/spotlight/job_outputs/output-iwild-^array_index^.txt
#PBS -e /scratch/st-kevinlb-1/will/jupyter/spotlight/job_outputs/error-iwild-^array_index^.txt
#PBS -J 1-{len(jobs)}

module load python3
"""

print('jobs: %d' % len(jobs))
write_job_file('scripts/run_spotlights_waterbirds_train.pbs', header, jobs)

jobs: 8


In [6]:
jobs = generate_iterative_jobs(
    datasets = [
        ('waterbirds_val_resnet', 1199, True, None, False, 1),
    ],
    spotlight_sizes = [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2], 
    num_spotlights = 5
)

header = f"""#!/bin/bash
#PBS -l walltime=6:00:00,select=1:ncpus=1:ngpus=1:mem=32gb
#PBS -N spotlight-waterbirds
#PBS -A st-kevinlb-1-gpu
#PBS -o /scratch/st-kevinlb-1/will/jupyter/spotlight/job_outputs/output-iwild-^array_index^.txt
#PBS -e /scratch/st-kevinlb-1/will/jupyter/spotlight/job_outputs/error-iwild-^array_index^.txt
#PBS -J 1-{len(jobs)}

module load python3
"""

print('jobs: %d' % len(jobs))
write_job_file('scripts/run_spotlights_waterbirds_val.pbs', header, jobs)

jobs: 8


In [7]:
jobs = generate_iterative_jobs(
    datasets = [
        ('waterbirds_test_resnet', 5794, True, None, False, 1),
    ],
    spotlight_sizes = [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2], 
    num_spotlights = 5
)

header = f"""#!/bin/bash
#PBS -l walltime=6:00:00,select=1:ncpus=1:ngpus=1:mem=32gb
#PBS -N spotlight-waterbirds
#PBS -A st-kevinlb-1-gpu
#PBS -o /scratch/st-kevinlb-1/will/jupyter/spotlight/job_outputs/output-iwild-^array_index^.txt
#PBS -e /scratch/st-kevinlb-1/will/jupyter/spotlight/job_outputs/error-iwild-^array_index^.txt
#PBS -J 1-{len(jobs)}

module load python3
"""

print('jobs: %d' % len(jobs))
write_job_file('scripts/run_spotlights_waterbirds_test.pbs', header, jobs)

jobs: 8
