![title](../images/header.png)

Add attributes from polygons and assets
-------
This notebook lets you add additional columns to your cleaned CEO data, including administrative areas for sub-national estimates and updated strata from assets in GEE. 
###### For more information contact aurelie.shapiro@fao.org or remi.dannunzio@fao.org

In [1]:
from pathlib import Path

import numpy as np
import pandas as pd
import geopandas as gpd
from matplotlib import pyplot as plt

import geopandas
import rasterio
import matplotlib.pyplot as plt
from shapely.geometry import Point

import ee
import geemap
import pandas as pd

In [2]:
# Initialize Earth Engine
ee.Initialize()

##### read points from scripts 1-7

In [3]:
#points = pd.read_csv('/home/sepal-user/module_results/esbae/DRC/DRC_all_points_whisp.csv', delimiter=',', low_memory=False)
#points = pd.read_csv('/home/sepal-user/module_results/esbae/DRC/DRC_all_ceo_2015_2022_2023_allData_whisp_prov_biome.csv', delimiter=',', low_memory=False)
#points = pd.read_csv('/home/sepal-user/module_results/esbae/GAB/GAB_all_ceo_2015_2022_2023_clean.csv', delimiter=',', low_memory=False)
#points = pd.read_csv('/home/sepal-user/module_results/esbae/COG/COG_all_ceo_2015_2016_2022_2023_clean.csv',delimiter=',', low_memory=False)
#points = pd.read_csv('/home/sepal-user/module_results/esbae/EQG/EQG_all_ceo_2015_2022_2023_prov.csv',delimiter=',', low_memory=False)
#points = pd.read_csv('/home/sepal-user/module_results/esbae/EQG/EQG_all_ceo_2015_2022_2023_prov.csv',delimiter=',', low_memory=False)
#points = pd.read_csv('/home/sepal-user/module_results/esbae/CAR/CAR_all_ceo_2015_2022_2023_pref.csv',delimiter=',', low_memory=False)
points = pd.read_csv('/home/sepal-user/module_results/esbae/CMR/CMR_all_ceo_2015_2022_2023_clean.csv',delimiter=',', low_memory=False)
#read GEE points
#points = ee.FeatureCollection('users/faocongo/sbae/sbae_hex16_cmr')

In [4]:
len(points)

9854

In [5]:
all_columns = points.columns.tolist()

# Display the list of column names
print(all_columns)

['UID', 'ID', 'plotid', 'point_id', 'sampleid', 'lon', 'lat', 'sample_geom', 'Ref_FNF_2015', 'Ref_Regeneration', 'Ref_Change_1522', 'Ref_Change_Type_1522', 'ArtFor', 'ArtMine', 'InfraR', 'Urb', 'IndFor', 'Other', 'Other_Desc', 'IndMine', 'IndAg', 'ArtAg', 'Ref_Year_1522', 'Ref_NFtype_2015', 'Ref_Ftype_2015', 'collection', 'interpreted', 'Ref_LCover_2015', 'Ref_Change_Year_1522', 'Ref_Change_LCover_1522', 'Def2016', 'Def2017', 'Def2018', 'Def2019', 'Def2020', 'Def2021', 'Def2022', 'Deg2016', 'Deg2017', 'Deg2018', 'Deg2019', 'Deg2020', 'Deg2021', 'Deg2022', 'Defall', 'Degall', 'Stable', 'NF', 'DensFor', 'DensDryFor', 'SecFor', 'DryOpenFor', 'Mangrove', 'Swamp', 'Gallery', 'Plantation', 'Woodland', 'Shrubland', 'Grassland', 'Aquatic', 'Bare', 'Cultivated', 'Builtup', 'Water', 'DensFor_Def', 'DensDryFor_Def', 'SecFor_Def', 'DryOpenFor_Def', 'Mangrove_Def', 'Swamp_Def', 'Gallery_Def', 'Plantation_Def', 'Woodland_Def', 'DensFor_Deg', 'DensDryFor_Deg', 'SecFor_Deg', 'DryOpenFor_Deg', 'Mangrov

In [6]:
#drop any columns if needed
#esbae = esbae.drop('index_right', axis=1)

In [6]:
#enter Lat and Lon columns
LATcol = 'lat'
LONcol = 'lon'

In [7]:
gdf = geopandas.GeoDataFrame(points, geometry=geopandas.points_from_xy(points[LONcol], points[LATcol]), crs="EPSG:4326")

#### get strata from model file

In [None]:
model = pd.read_csv('/home/sepal-user/module_results/esbae/COG/COG_esbae_2015_2022_model.csv',delimiter=',', low_memory=False)

In [None]:
model['stratum'].value_counts(dropna=False)

In [None]:
model_columns = model.columns.tolist()

# Display the list of column names
print(model_columns)

In [None]:
stratum_gdf = gdf.merge(
    model[['point_id', 'stratum']],  # Keep only 'point_id' and 'stratum' columns from model_df
    on='point_id',  # Merge on point_id
    how='left'  # Keep all rows from points_gdf, and only matching rows from model_df
)

In [None]:
stratum_gdf.head()

In [None]:
#drop any columns if needed
columns_to_drop = ['UID.1']
stratum_gdf = stratum_gdf.drop(columns_to_drop, axis=1)

In [None]:
strat_columns = stratum_gdf.columns.tolist()
# Display the list of column names
print(strat_columns)

In [None]:
# Rename columns
column_names = {
   'stratum': 'eSBAE_strat_1522'
#    'NAME_2': 'Territoire',
#    'Unnamed: 0':'Index'
}

# Use the rename() method to rename columns
stratum_gdf.rename(columns=column_names, inplace=True)

In [None]:
# save output
out_file =  '/home/sepal-user/module_results/esbae/COG/COG_all_ceo_2015_2016_2022_2023_clean_strat.csv'
stratum_gdf.to_csv(out_file,index=False)

#### add attributes from local shapefile to associate points with sub-national juridictions

In [8]:
# this shapefile should be uploaded to your SEPAL workspace
#poly = "/home/sepal-user/data/admin/RDC_Province_26.shp"
#poly = "/home/sepal-user/data/aoi/Ecoregions2017.shp"
poly = "/home/sepal-user/data/admin/cmr_admbnda_adm1_inc_20180104.shp"
poly_shp = gpd.read_file(poly)
poly_shp.head()

Unnamed: 0,Shape_Leng,Shape_Area,ADM1_EN,ADM1_FR,ADM1_PCODE,ADM1_REF,ADM1ALT1EN,ADM1ALT2EN,ADM1ALT1FR,ADM1ALT2FR,ADM0_EN,ADM0_FR,ADM0_PCODE,date,validOn,validTo,geometry
0,15.151424,5.23554,Adamawa,Adamaoua,CM001,,,,,,Cameroon,Cameroun (le),CM,2018-12-17,2019-01-04,0000/00/00,"POLYGON ((12.28874 8.1817, 12.28379 8.16514, 1..."
1,16.170961,5.601567,Centre,Centre,CM002,,,,,,Cameroon,Cameroun (le),CM,2018-12-17,2019-01-04,0000/00/00,"POLYGON ((11.99753 6.26224, 11.99793 6.25936, ..."
2,18.826415,8.95652,East,Est,CM003,,,,,,Cameroon,Cameroun (le),CM,2018-12-17,2019-01-04,0000/00/00,"POLYGON ((14.39484 6.06262, 14.39542 6.06071, ..."
3,13.27286,2.826867,Far-North,Extrême-Nord,CM004,,,,,,Cameroon,Cameroun (le),CM,2018-12-17,2019-01-04,0000/00/00,"POLYGON ((14.53742 12.94356, 14.54344 12.93673..."
4,10.673468,1.646349,Littoral,Littoral,CM005,,,,,,Cameroon,Cameroun (le),CM,2018-12-17,2019-01-04,0000/00/00,"POLYGON ((9.84789 5.33234, 9.84934 5.32882, 9...."


In [9]:
# from the table above, identify the column name you want to associate
admin_name = 'ADM1_FR'
#admin_name = 'shapeName'
#admin_name = 'admin1Name'
#admin_name = 'NOM'
#admin_name = 'Duparteme'
#new_name = 'Prefecture'
new_name = 'Province'

In [10]:
poly_crs = poly_shp.crs
print("Current CRS:", poly_crs)

Current CRS: EPSG:4326


In [11]:
poly_shp = poly_shp.set_crs(epsg=3395)

ValueError: The GeoSeries already has a CRS which is not equal to the passed CRS. Specify 'allow_override=True' to allow replacing the existing CRS without doing any transformation. If you actually want to transform the geometries, use 'GeoSeries.to_crs' instead.

In [None]:
poly_shp = poly_shp.to_crs(gdf.crs)

In [12]:
# Reproject both GeoDataFrames to the same projected CRS (e.g., EPSG:3395 for World Mercator)
gdf = gdf.to_crs("EPSG:3395")
poly_shp = poly_shp.to_crs("EPSG:3395")

# Step 1: Perform the spatial join for intersecting points
joined_data_intersecting = gpd.sjoin(gdf, poly_shp[[admin_name, 'geometry']], how="left", predicate="within", lsuffix='left', rsuffix='right')

# Drop the 'index_right' column if it exists, to avoid conflicts
if 'index_right' in joined_data_intersecting.columns:
    joined_data_intersecting = joined_data_intersecting.drop(columns='index_right')

# Step 2: Identify points without an intersection (NaN values in the Admin_Name column)
no_intersection_points = joined_data_intersecting[joined_data_intersecting[admin_name].isna()]

# Step 3: Perform the nearest spatial join for points without an intersection
nearest_join = gpd.sjoin_nearest(no_intersection_points.drop(columns=admin_name), poly_shp[[admin_name, 'geometry']], how="left", distance_col="distance_to_polygon")

# Drop the 'index_right' column if it exists in the nearest join result
if 'index_right' in nearest_join.columns:
    nearest_join = nearest_join.drop(columns='index_right')

# Step 4: Combine intersecting and nearest joined data into one GeoDataFrame
# Retain only the specified `admin_name` column from `poly_shp`
joined_data_combined = pd.concat([
    joined_data_intersecting.dropna(subset=[admin_name]),
    nearest_join
])

# Select only the columns from `gdf` plus `admin_name`
columns_to_keep = list(gdf.columns) + [admin_name]
joined_data = joined_data_combined[columns_to_keep]

# Display the resulting GeoDataFrame
joined_data.head()

Unnamed: 0,UID,ID,plotid,point_id,sampleid,lon,lat,sample_geom,Ref_FNF_2015,Ref_Regeneration,...,Ref_LCover_2022,Ref_Change_Year_2023,Ref_Change_LCover_2023,Def2023,Deg2023,interpreted_qc,Ref_Change_Type_1523,Ref_Change_Year_1523,geometry,ADM1_FR
0,0,0,3195,3195.0,3195,11.966747,7.395009,POINT(11.966747 7.395009),1,,...,,,,,,,Stable,Stable,POINT (1332132.182 820007.964),Adamaoua
1,1,1,382369,382369.0,382369,13.462191,4.616583,POINT(13.462191 4.616583),1,,...,,,,,,,Deg,Deg2022,POINT (1498604.247 511035.977),Est
2,2,2,381976,381976.0,381976,13.326331,4.472166,POINT(13.326331 4.472166),1,,...,,,,,,,Stable,Stable,POINT (1483480.381 495016.134),Est
3,3,3,381269,381269.0,381269,13.454098,4.943217,POINT(13.454098 4.943217),1,,...,,,,,,,Stable,Stable,POINT (1497703.338 547281.079),Est
4,4,4,379902,379902.0,379902,13.021454,4.519678,POINT(13.021454 4.519678),1,,...,,,,,,,Stable,Stable,POINT (1449541.629 500286.172),Est


In [13]:
nearest_join.head()

Unnamed: 0,UID,ID,plotid,point_id,sampleid,lon,lat,sample_geom,Ref_FNF_2015,Ref_Regeneration,...,Ref_Change_Year_2023,Ref_Change_LCover_2023,Def2023,Deg2023,interpreted_qc,Ref_Change_Type_1523,Ref_Change_Year_1523,geometry,ADM1_FR,distance_to_polygon
1574,1574,1574,1046869,390284.0,1046869,14.440224,6.089317,POINT(14.440224 6.089317),1,,...,,,,,,Stable,Stable,POINT (1607478.383 674609.944),Adamaoua,510.636786
4267,4267,4267,936265,552244.0,936265,15.450794,7.891717,POINT(15.450794 7.891717),1,,...,,,,,,Stable,Stable,POINT (1719974.52 875430.195),Nord,132.783634
5135,5135,5135,899146,109849.0,899146,9.493997,6.448739,POINT(9.493997 6.448739),0,non,...,,,,,,NF,NF,POINT (1056866.912 714595.118),Sud-Ouest,102.886436
6780,6780,47,375,,375,12.817746,8.954631,POINT(12.81774638 8.954631331),1,,...,Stable,Stable_11,0.0,0.0,1.0,Stable,Stable,POINT (1426865 994261.66),Nord,225.056111
6816,6816,83,259,,259,14.433187,9.136361,POINT(14.43318675 9.136360513),1,non,...,NF,NF_12,0.0,0.0,0.0,NF,NF,POINT (1606695 1014612.656),Nord,615.77224


In [14]:
joined_data[admin_name].value_counts(dropna=False)

ADM1_FR
Adamaoua        2151
Nord            1837
Centre          1627
Est             1615
Nord-Ouest       547
Sud              537
Ouest            510
Littoral         423
Sud-Ouest        413
Extrême-Nord     194
Name: count, dtype: int64

In [15]:
province_pts = pd.pivot_table(joined_data,values='plotid',index=[admin_name],columns=['collection'],aggfunc="count",margins=True,
                             margins_name='Total',dropna=False)
province_pts

collection,QC_2023,coll_1522_1,coll_1522_2,coll_2023_1,coll_2023_2,coll_2023_3,coll_2023_4,coll_2023_5,coll_2023_6,Total
ADM1_FR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Adamaoua,40,73,1559,86,68,21,106,40,158,2151
Centre,42,73,1088,50,67,21,118,50,118,1627
Est,46,135,983,63,92,15,170,26,85,1615
Extrême-Nord,12,1,48,33,22,10,23,12,33,194
Littoral,15,29,264,17,24,9,21,11,33,423
Nord,49,71,1065,87,67,66,102,86,244,1837
Nord-Ouest,13,23,355,35,22,12,35,16,36,547
Ouest,18,17,310,32,20,14,21,23,55,510
Sud,15,47,305,19,46,2,73,6,24,537
Sud-Ouest,10,29,258,20,22,3,40,4,27,413


In [None]:
# Remove rows where the 'Province' column has NaN values
joined_data = joined_data.dropna(subset=[admin_name])

In [16]:
len(joined_data)

9854

In [17]:
poly_shp = poly_shp.to_crs("EPSG:3395")

In [18]:
#calculate area of poly
poly_shp['area_sqm'] = poly_shp.geometry.area

In [19]:
poly_shp['area_ha'] = poly_shp['area_sqm'] / 10000

In [20]:
pd.set_option('display.float_format', lambda x: '%.0f' % x)

# Display the GeoDataFrame with the new area columns
print(poly_shp[[admin_name, 'area_ha']])

        ADM1_FR  area_ha
0      Adamaoua  6491649
1        Centre  6918872
2           Est 11051277
3  Extrême-Nord  3546998
4      Littoral  2032243
5          Nord  6763003
6    Nord-Ouest  1763348
7           Sud  4728342
8     Sud-Ouest  2497090
9         Ouest  1408322


In [21]:
# Rename columns
column_mapping = {
   admin_name: new_name
#    'NAME_2': 'Territoire',
#    'Unnamed: 0':'Index'
}

# Use the rename() method to rename columns
joined_data.rename(columns=column_mapping, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  joined_data.rename(columns=column_mapping, inplace=True)


In [22]:
list_columns = joined_data.columns.tolist()

# Display the list of column names
print(list_columns)

['UID', 'ID', 'plotid', 'point_id', 'sampleid', 'lon', 'lat', 'sample_geom', 'Ref_FNF_2015', 'Ref_Regeneration', 'Ref_Change_1522', 'Ref_Change_Type_1522', 'ArtFor', 'ArtMine', 'InfraR', 'Urb', 'IndFor', 'Other', 'Other_Desc', 'IndMine', 'IndAg', 'ArtAg', 'Ref_Year_1522', 'Ref_NFtype_2015', 'Ref_Ftype_2015', 'collection', 'interpreted', 'Ref_LCover_2015', 'Ref_Change_Year_1522', 'Ref_Change_LCover_1522', 'Def2016', 'Def2017', 'Def2018', 'Def2019', 'Def2020', 'Def2021', 'Def2022', 'Deg2016', 'Deg2017', 'Deg2018', 'Deg2019', 'Deg2020', 'Deg2021', 'Deg2022', 'Defall', 'Degall', 'Stable', 'NF', 'DensFor', 'DensDryFor', 'SecFor', 'DryOpenFor', 'Mangrove', 'Swamp', 'Gallery', 'Plantation', 'Woodland', 'Shrubland', 'Grassland', 'Aquatic', 'Bare', 'Cultivated', 'Builtup', 'Water', 'DensFor_Def', 'DensDryFor_Def', 'SecFor_Def', 'DryOpenFor_Def', 'Mangrove_Def', 'Swamp_Def', 'Gallery_Def', 'Plantation_Def', 'Woodland_Def', 'DensFor_Deg', 'DensDryFor_Deg', 'SecFor_Deg', 'DryOpenFor_Deg', 'Mangrov

In [23]:
joined_data = joined_data.to_crs("EPSG:4326")

In [24]:
# save output
out_file_shp_att =  '/home/sepal-user/module_results/esbae/CMR/CMR_all_ceo_2015_2016_2022_2023_prov.csv'
joined_data.to_csv(out_file_shp_att,index=False)

#### assign strata from GEE asset

In [25]:
gdf = joined_data

In [26]:
UID = 'UID'

In [27]:
# Assuming df is your DataFrame - check for unique values
if gdf[UID].nunique() == len(gdf):
    print("UID has entirely unique values.")
else:
    print("UID does not have entirely unique values.")

if gdf['ID'].nunique() == len(gdf):
    print("ID has entirely unique values.")
else:
    print("ID does not have entirely unique values.")

UID has entirely unique values.
ID does not have entirely unique values.


In [28]:
# Remove duplicates keeping the last occurrence (the most recent date)
#gdf.drop_duplicates(subset='sample_geom', keep='last', inplace=True)

In [29]:
# select 2022-2023 strata asset you want to sample 
#asset2223 = ee.Image('users/faocongo/sbae/EQG_FNF_kmeans_strat_5_2022_2023')
#asset2223 = ee.Image('users/faocongo/sbae/CAF_FNF_kmeans_strat_5_2022_2023')
#asset2223 = ee.Image('users/faocongo/sbae/GAB_FNF_kmeans_strat_5_2022_2023')
asset2223 = ee.Image('users/faocongo/sbae/CMR_FNF_kmeans_strat_5_2022_2023')
#asset2223 = ee.Image('users/faocongo/sbae/COD_allData_stratification_2023')
#asset2223 = ee.Image('users/faocongo/sbae/COD_FNF_kmeans_strat_5_2022_2023')
#asset2223 = ee.Image('users/faocongo/sbae/COD_allData_stratification_2023')
#asset2223 = ee.Image('projects/ee-ameliearquero/assets/COG_FNF_kmeans_strat_3_2022_2023')
#asset2223 = ee.Image('users/faocongo/sbae/CMR_allData_stratification_2023')

In [30]:
# what to rename sampled band to. it is always called first
newName2223 = 'TNT_stratum_2023'

In [31]:
# select 2015-2022 strata asset you want to sample 
#asset1522 = ee.Image('users/faocongo/sbae/EQG_FNF_kmeans_strat_5_2015_2022')
#asset1522 = ee.Image('users/faocongo/sbae/CAF_FNF_kmeans_strat_5_2015_2022')
#asset1522 = ee.Image('users/faocongo/sbae/GAB_FNF_kmeans_strat_5_2015_2022')
#asset1522 = ee.Image('users/faocongo/sbae/COD_allData_stratification_1522')
#asset1522 = ee.Image('users/faocongo/sbae/COD_allData_stratification_1522')
asset1522 = ee.Image('users/faocongo/sbae/CMR_FNF_kmeans_strat_5_2015_2022')
#asset1522 = ee.Image('users/faocongo/sbae/COD_FNF_kmeans_strat_5_2015_2022')

In [32]:
newName1522 = 'TNT_stratum_1522'

In [33]:
#resolution of asset
scale = 70

In [34]:
#name of band
band = 'strata'

In [None]:
#sample 2223 stratum
gdf = gdf.reset_index(drop=True)
#with just a few columns
#this can take a little while with many points
dfs = []
for i in range(0, len(points), 5000):
    tmp_df = gdf.loc[i:i+4999]
    tmp_fc = geemap.gdf_to_ee(tmp_df[[UID, 'geometry']])
    tmp_smp = asset2223.select(band).reduceRegions(**{
        'collection': tmp_fc,
        'scale': scale,
        'reducer': ee.Reducer.first()   
    })

    tmp_res = geemap.ee_to_gdf(tmp_smp)
    dfs.append(tmp_res)
    
sampled_df = pd.concat(dfs)

In [None]:
sampled_df

In [None]:
sampled_df = sampled_df.rename(columns={'first':newName2223})
sampled_df.head()

In [None]:
#sample 1522 stratum
gdf = sampled_df.reset_index(drop=True)
#with just a few columns
#this can take a little while with many points
dfs = []
for i in range(0, len(points), 5000):
    tmp_df = gdf.loc[i:i+4999]
    tmp_fc = geemap.gdf_to_ee(tmp_df[[UID, 'geometry',newName2223]])
    tmp_smp = asset1522.select(band).reduceRegions(**{
        'collection': tmp_fc,
        'scale': scale,
        'reducer': ee.Reducer.first()   
    })

    tmp_res = geemap.ee_to_gdf(tmp_smp)
    dfs.append(tmp_res)
    
resampled_df = pd.concat(dfs)

In [None]:
resampled_df = resampled_df.rename(columns={'first':newName1522})
resampled_df.head()

In [None]:
# recover columns
# Step 1: Merge the two DataFrames on the 'ID' column (assuming 'ID' is the common key)
merged_df = pd.merge(resampled_df, points, on=UID, how='left', suffixes=('', '_dup'))
#merged_df = pd.merge(sampled_df, joined_data, on='UID', how='left', suffixes=('', '_dup'))
#merged_df = pd.merge(sampled_df, gdf, on='UID', how='left', suffixes=('', '_dup'))
# Step 2: Remove duplicate columns
# Here, we assume that any duplicate column from df2 will have the '_dup' suffix added by the merge
# We will keep the columns from df1 and drop the duplicate ones from df2.
merged_df = merged_df.loc[:, ~merged_df.columns.str.endswith('_dup')]
merged_df.head()

In [None]:
merged_df_cols = merged_df.columns.tolist()
# Display the list of column names
print(merged_df_cols)

In [None]:
#drop any columns if needed
#columns_to_drop = ['sample_geom']
#merged_df = merged_df.drop(columns_to_drop, axis=1)

In [None]:
merged_df[newName1522].value_counts(dropna=False)

In [None]:
merged_df[newName2223].value_counts(dropna=False)

In [None]:
# Using loc to replace only rows where 'stratum' is 0
merged_df.loc[merged_df[newName1522] == 0, newName1522] = 1

In [None]:
# Using loc to replace only rows where 'stratum' is 0
merged_df.loc[merged_df[newName2223] == 0, newName2223] = 1

In [None]:
merged_df[newName1522] = merged_df[newName1522].fillna(1)
merged_df[newName2223] = merged_df[newName2223].fillna(1)

In [None]:
print(merged_df.dtypes)

In [None]:
pd.pivot_table(
    merged_df,
    values=UID,
    index=[newName1522],
    columns=['collection'],
    aggfunc="count",
    margins=True,              # Adds totals
    margins_name='Total',      # Name of the margins/total column
    dropna=False               # Include NaN values in the table
)

In [None]:
pd.pivot_table(
    merged_df,
    values=UID,
    index=[newName2223],
    columns=['collection'],
    aggfunc="count",
    margins=True,              # Adds totals
    margins_name='Total',      # Name of the margins/total column
    dropna=False               # Include NaN values in the table
)

In [None]:
# Remove rows where the stratum has NaN values - outside the country
#merged_df = merged_df.dropna(subset=['TNT_stratum_2223'])

In [None]:
merged_df['sampling'].value_counts(dropna=False)

In [None]:
merged_df['collection'].value_counts(dropna=False)

In [None]:
# Check if the lengths of merged_df and ceo are the same
if len(merged_df) == len(points):
    print("The lengths of merged_df and ceo are the same.")
else:
    print("The lengths of merged_df and ceo are different.")

# Optionally, you can print the lengths for clarity
print("Length of merged_df:", len(merged_df))
print("Length of ceo:", len(points))

#### export

In [None]:
out_file =  '/home/sepal-user/module_results/esbae/CMR/CMR_all_ceo_2015_2022_2023_prov_TNT.csv'
#sampled_df.to_csv(out_file,index=False)
merged_df.to_csv(out_file,index=False)