# Generating Togo initial sample

In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np

## Clustered Initial Sample

In [2]:
from cluster_sampling import ClusterSampler

In [None]:
ADMIN_IDS = {
    'canton': 'canton'
}

data_path = f"/home/libe2152/optimizedsampling/0_data/admin_gdfs/togo/gdf_adm3.geojson"
gdf = gpd.read_file(data_path)
train_ids = pd.read_csv('/share/togo/splits/train_ids.csv').astype(str)
train_id_set = set(train_ids.iloc[:, 0])
gdf = gdf[gdf['id'].astype(str).isin(train_ids.iloc[:, 0])].copy()

gdf['canton'] = gdf['combined_adm_id']
gdf['prefecture'] = gdf['admin_2']
gdf['region'] = gdf['admin_1']

out_path = f'/home/libe2152/optimizedsampling/0_data/initial_samples/togo/cluster_sampling'

country_shape_file = '/home/libe2152/togo/data/shapefiles/openAfrica/Shapefiles/tgo_admbnda_adm0_inseed_itos_20210107.shp'

strata_col = 'region'
cluster_col = 'canton'

all_strata = gdf[strata_col].astype(str).unique()
n_strata = 1
print(all_strata)

np.random.seed(121212) #78910
fixed_strata = np.random.choice(all_strata, size=n_strata, replace=False)

for points_per_cluster in [10, 25]:
    sampler = ClusterSampler(gdf, id_col='id', strata_col=strata_col, cluster_col=cluster_col, ADMIN_IDS=ADMIN_IDS)
    for total_sample_size in range(500, 1100, 100):
        
        for seed in [1, 42, 123, 456, 789, 1234, 5678, 9101, 1213, 1415]:
            try:
                sampler.sample(total_sample_size, points_per_cluster, seed, fixed_strata=fixed_strata)
                sampler.save_sampled_ids(out_path)

                # Assert that all sampled ids are in train_ids
                sampled_ids_set = set(map(str, sampler.sampled_ids))
                assert sampled_ids_set.issubset(train_id_set), (
                    "Error: Some sampled IDs are not in train_ids!"
                )
                sampler.plot(country_shape_file=country_shape_file)
            except Exception as e:
                print(e)
                from IPython import embed; embed()
            sampler.reset_sample()

[Init] Initializing ClusterSampler...
[Stratify] Stratifying points by column: region
[Stratify] Unique strata found: ['savanes' 'kara' 'centrale' 'plateaux' 'maritime']
[Init] Found 5 strata.
[Sample] Starting sampling process...
[Determine Sample Sizes] Total desired sample size: 500
[Determine Sample Sizes] Selected strata: ['savanes']
[Determine Sample Sizes] Final sample size per stratum:
{'savanes': 500}
[Sample] Sample sizes per stratum: {'savanes': 500}
[Sample] Processing stratum: savanes
[Sample] Sampling complete. Total points sampled: 499
Clusters: canton
Saved 499 sampled IDs and metadata to /home/libe2152/optimizedsampling/0_data/initial_samples/togo/cluster_sampling/fixedstrata_savanes/sample_region_canton_10ppc_500_size_seed_1.pkl
Saved plot to /home/libe2152/optimizedsampling/0_data/initial_samples/togo/cluster_sampling/fixedstrata_savanes/plots/sample_region_canton_10ppc_499_size_seed_1.png
[Reset Sample] Clearing previous sample state...
[Sample] Starting sampling pr

KeyboardInterrupt: 

<Figure size 1200x1000 with 0 Axes>