In [None]:
import geopandas as gpd

# Cluster Sampling

In [None]:
from cluster_sampling import ClusterSampler

In [None]:

ADMIN_IDS = {
    'pc11_s_id': 'state',
    'pc11_d_id': 'district',
    'pc11_sd_id': 'subdistrict'
}

data_path = "/share/india_secc/MOSAIKS/shrugs_with_all_admins.geojson"
gdf = gpd.read_file(data_path)

country_shape_file = '/home/libe2152/optimizedsampling/0_data/boundaries/world/ne_10m_admin_0_countries.shp'
country_name = 'India'

strata_col = 'pc11_s_id'
cluster_col = 'pc11_d_id'

out_path = f'/home/libe2152/optimizedsampling/0_data/initial_samples/india_secc/cluster_sampling'

sampler = ClusterSampler(gdf, id_col='condensed_shrug_id', strata_col=strata_col, cluster_col=cluster_col, ADMIN_IDS=ADMIN_IDS)

for points_per_cluster in [2, 5, 10, 25]:
    sampler.cluster_col = cluster_col
    sampler.merge_small_strata(points_per_cluster)
    sampler.merge_small_clusters(points_per_cluster)
    for total_sample_size in range(100, 5000, 100):
        
        for seed in [1, 42, 123, 456, 789, 1234, 5678, 9101, 1213, 1415]:
            try:
                sampler.sample(total_sample_size, points_per_cluster, seed=seed)
                sampler.save_sampled_ids(out_path)
                sampler.plot(country_shape_file=country_shape_file, country_name=country_name)
            except Exception as e:
                print(e)
                from IPython import embed; embed()
            sampler.reset_sample()

# Convenience Sampling

In [None]:
from convenience_sampling import ConvenienceSampler

In [None]:
id_col = 'condensed_shrug_id'
gdf_path = '/share/india_secc/MOSAIKS/shrugs_with_all_admins.geojson'
n_urban = 50
pop_col = 'pc11_pca_tot_p_combined'
country_shape_file = '/home/libe2152/optimizedsampling/0_data/boundaries/world/ne_10m_admin_0_countries.shp'
country_name = 'India'
out_path = f'/home/libe2152/optimizedsampling/0_data/initial_samples/india_secc/convenience_sampling'
distances_dir = f'/home/libe2152/optimizedsampling/0_data/distances/india_secc/distance_to_top50_urban.pkl'

print("Reading GeoDataFrame...")
gdf = gpd.read_file(gdf_path)

method = 'deterministic'
for desired_sample_size in range(100, 5000, 100):
    print("Initializing ConvenienceSampler...")
    sampler = ConvenienceSampler(
        id_col=id_col,
        gdf_points=gdf,
        gdf_urban=gdf,
        n_urban=n_urban,
        pop_col=pop_col,
        distances_dir=distances_dir
    )

    sampler.sample(n_samples=desired_sample_size, method=method, seed=1) #seed needed to break ties
    sampler.save_sampled_ids(out_path)
    sampler.plot(country_shape_file=country_shape_file, country_name=country_name)

for method in ['probabilistic']:
    for desired_sample_size in range(100, 5000, 100):
        for seed in [1, 42, 123, 456, 789, 1234, 5678, 9101, 1213, 1415]:

            print("Initializing ConvenienceSampler...")
            sampler = ConvenienceSampler(
                id_col=id_col,
                gdf_points=gdf,
                gdf_urban=gdf,
                n_urban=n_urban,
                pop_col=pop_col,
                distances_dir=distances_dir
            )

            sampler.sample(n_samples=desired_sample_size, method=method, seed=seed)
            sampler.save_sampled_ids(out_path)
            sampler.plot(country_shape_file=country_shape_file, country_name=country_name)

# Random Sampling

In [None]:
from random_sampling import RandomSampler

In [None]:
data_path = "/share/india_secc/MOSAIKS/shrugs_with_all_admins.geojson"
gdf = gpd.read_file(data_path)

out_path = f'/home/libe2152/optimizedsampling/0_data/initial_samples/india_secc/random_sampling'

country_shape_file = '/home/libe2152/optimizedsampling/0_data/boundaries/world/ne_10m_admin_0_countries.shp'
country_name = 'India'

sampler = RandomSampler(gdf, id_col="condensed_shrug_id")

for total_sample_size in range(100, 5000, 100):
    for seed in [1, 42, 123, 456, 789, 1234, 5678, 9101, 1213, 1415]:
        sampler.sample(total_sample_size=total_sample_size, seed=seed)
        sampler.save_sampled_ids(out_path)
        sampler.plot(country_shape_file=country_shape_file, country_name=country_name)
        sampler.reset_sample()