# Togo Active Learning Experiment Runner

This notebook manages the execution of sampling experiments on the Togo soil fertility dataset.

Author: Livia Betti  
Date: July 2025

### To run this notebook, the following tasks should be completed:
1. Generate relevant groups in Togo. I have generated group assignments based on regions, but if there are other representative groups that might be useful, we can generate those as well.
2. Assign (distance-based) costs for the Convenience sampling setting

## Imports

In [1]:
import subprocess
from pathlib import Path

base_dir = Path("/home/libe2152/optimizedsampling/3_sampling") #replace with path to folder
script = base_dir / "tools" / "train.py"  #adjust if needed
cfg = base_dir / "configs" / "togo" / "RIDGE.yaml"  #adjust if needed

## Core config values

In [None]:
#static experiment parameters
seeds = [1, 42, 123, 456, 789, 1234, 5678, 9101, 1213, 1415]
methods = ["random", "greedycost", "poprisk", "similarity"]
budgets = [100, 500, 1000, 5000, 10000]

# Common
init_name = "empty_initial_set"
dataset = "togo"
group_type = "regions"
util_lambda = 0.5
id_path = ""
group_assignment_path = "/home/libe2152/optimizedsampling/0_data/groups/togo/region_assignment.pkl"


## Cost related arguments

In [None]:
def run_setting1(seed):
    for cost_fn in ["uniform", "convenience_based"]:
        print(f"Cost function: {cost_fn}, seed: {seed}")
        
        for method in methods:
            for budget in budgets:
                cost_func = "uniform"
                cost_name = cost_fn
                unit_assignment_path = ""
                unit_type = ""
                points_per_unit = ""
                region_assignment_path = ""
                in_region_unit_cost = ""
                out_of_region_unit_cost = ""

                if cost_fn == "convenience_based":
                    cost_func = "pointwise_by_array"
                    cost_array_path = base_dir / "0_data/costs/togo/convenience_costs/distance_based_costs_top1_urban.pkl"

                if method in ["poprisk"]:
                    exp_name = f"{dataset}_{init_name}_cost_{cost_name}_method_{method}_{group_type}_budget_{budget}_seed_{seed}"
                else:
                    exp_name = f"{dataset}_{init_name}_cost_{cost_name}_method_{method}_budget_{budget}_seed_{seed}"

                cmd = f"""
python {script} \\
  --cfg {cfg} \\
  --exp-name {exp_name} \\
  --sampling_fn {method} \\
  --budget {budget} \\
  --initial_set_str {init_name} \\
  --id_path {id_path} \\
  --seed {seed} \\
  --cost_func {cost_func} \\
  --cost_name {cost_name} \\
  --unit_assignment_path {unit_assignment_path} \\
  --unit_type {unit_type} \\
  --points_per_unit {points_per_unit} \\
  --region_assignment_path {region_assignment_path} \\
  --in_region_unit_cost {in_region_unit_cost} \\
  --out_of_region_unit_cost {out_of_region_unit_cost} \\
  --group_assignment_path {group_assignment_path} \\
  --group_type {group_type} \\
  --util_lambda {util_lambda}
"""
                print(f"Running: {exp_name}")
                !{cmd}


## Method Related arguments

### args for similarity/diversity method

In [38]:
similarity_matrix_path = "/home/libe2152/optimizedsampling/0_data/cosine_similarity/togo/cosine_similarity_train_test.npz"

## Run

In [None]:
for seed in seeds:
    run_setting1(seed)