![title](../images/header.png)

Merging CEO data phase II (2023)
-------
This notebook merges validated data from different CEO CAFI projects for 2023
###### For more information contact aurelie.shapiro@fao.org or remi.dannunzio@fao.org

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
# Set the display option to a large value to prevent text wrapping
pd.set_option('display.max_colwidth', None)

## Enter parameters

In [2]:
#identifier for country 
# COD COG CMR GAB EQG CAF
iso = 'EQG'

### 1. Read all CEO sample files
download your CEO sample files from collect.earth and upload to SEPAL using FileZilla.
add as many files as you have projects

In [3]:
#DRC
#ceo1 = pd.read_csv('/home/sepal-user/module_results/esbae/DRC/ceo-CAFI-DDD-RDC-eSBAE-validation-1-sample-data.csv')
#ceo2 = pd.read_csv('/home/sepal-user/module_results/esbae/DRC/ceo-CAFI-DDD-RDC-eSBAE-validation-2-sample-data.csv')
#ceo3 = pd.read_csv('/home/sepal-user/module_results/esbae/DRC/ceo-CAFI-DDD-RDC-eSBAE-validation-3-sample-data.csv')
#GAB
#ceo1 = pd.read_csv('/home/sepal-user/module_results/esbae/GAB/ceo-CAFI-DDD-Gabon-eSBAE-validation-1-sample-data.csv')
#ceo2 = pd.read_csv('/home/sepal-user/module_results/esbae/GAB/ceo-CAFI-DDD-Gabon-eSBAE-validation-2-sample-data.csv')
#ceo3 = pd.read_csv('/home/sepal-user/module_results/esbae/GAB/ceo-CAFI-DDD-Gabon-eSBAE-validation-3-sample-data.csv')
#ceo4 = pd.read_csv('/home/sepal-user/module_results/esbae/GAB/ceo-CAFI-DDD-Gabon-eSBAE-validation-4-sample-data.csv')
#ceo5 = pd.read_csv('/home/sepal-user/module_results/esbae/GAB/ceo-CAFI-DDD-Gabon-eSBAE-validation-5-sample-data.csv')
#EQG
#ceo1 = pd.read_csv('/home/sepal-user/module_results/esbae/EQG/ceo-CAFI-DDD-Equatorial-Guinea-eSBAE-validation-1-sample-data.csv')
#ceo2 = pd.read_csv('/home/sepal-user/module_results/esbae/EQG/ceo-CAFI-DDD-Equatorial-Guinea-eSBAE-validation-2-sample-data.csv')
ceo1 = pd.read_csv('/home/sepal-user/module_results/esbae/EQG/ceo-CAFI-DDD-EQG-eSBAE-2023-collection-1-sample-data.csv')
ceo2 = pd.read_csv('/home/sepal-user/module_results/esbae/EQG/ceo-CAFI-DDD-EQG-eSBAE-2023-collection-2-sample-data.csv')
#COG
#ceo1 = pd.read_csv('/home/sepal-user/module_results/esbae/COG/COG_all_ceo_1522_1622_clean.csv')
#CMR
#ceo1 = pd.read_csv('/home/sepal-user/module_results/esbae/CMR/ceo-CAFI-DDD-Cameroun-eSBAE-2023-collection-1-sample-data.csv')
#ceo2 = pd.read_csv('/home/sepal-user/module_results/esbae/CMR/ceo-CAFI-DDD-Cameroun-eSBAE-validation-2-sample-data.csv')
#CAR
#ceo1 = pd.read_csv('/home/sepal-user/module_results/esbae/CAR/ceo-CAFI-DDD-RCA-eSBAE-validation-1-sample-data.csv')
#ceo2 = pd.read_csv('/home/sepal-user/module_results/esbae/CAR/ceo-CAFI-DDD-RCA-eSBAE-validation-2-sample-data.csv')

make a list of the ceo files for processing

In [4]:
#depends on how many files you have
# if you have 5 files
#ceo_files = [ceo1, ceo2, ceo3, ceo4, ceo5]
# if you have 3 files
#ceo_files = [ceo1, ceo2, ceo3]
# if you have 2 files
ceo_files = [ceo1,ceo2]
# if you have 1 file
#ceo_files = [ceo1]

In [5]:
# output file  CEO validated data file with select columns
ceo_data_out =  '/home/sepal-user/module_results/esbae/EQG/EQG_all_ceo_2023_clean.csv'

### end of parameters

In [6]:
column_names = ceo1.columns.tolist()
# Print the list of column names
print(column_names)

['plotid', 'sampleid', 'sample_internal_id', 'lon', 'lat', 'email', 'flagged', 'collection_time', 'analysis_duration', 'imagery_title', 'imagery_attributions', 'sample_geom', 'pl_index', 'pl_chgprob', 'pl_maxforprob', 'pl_strata', 'forêt ou non-forêt en 2022', "Y'a t'il de la régéneration", 'Changement en 2023', 'Type de Changement 2023', 'Exploitation forestière artisanale (<5ha)', 'Activité minière artisanale', 'Infrastructure routière', 'Infrastructure d’urbanisation ou habitations', 'Exploitation forestière industrielle (>5ha)', 'Autre moteur', 'Descriptif autre moteur', 'Activité minière industrielle', 'Agriculture industrielle', 'Agriculture paysanne', 'Année de Changement', 'Type de non-forêt en 2022', 'Type de forêt en 2022', 'Commentaires']


##### add a column named "interpreted" which = 1 when the point has been validated in CEO, otherwise 0
count the number of validated points per ceo file
replace forest and non-forest types with codes

In [7]:
# List to store the modified DataFrames
modified_ceo_files = []

# Initialize collection counter
collection_counter = 1

for i, ceo in enumerate(ceo_files):
       
    # Sort the DataFrame by 'date' in descending order
    ceo.sort_values(by='collection_time', ascending=True, inplace=True)

    # Remove duplicates keeping the last occurrence (the most recent date)
    ceo.drop_duplicates(subset='plotid', keep='first', inplace=True)

    # Reset the index
    ceo.reset_index(drop=True, inplace=True)
    
    # Add 'collection' column with an increasing number
    ceo['collection'] = collection_counter
    
    # Add 'interpreted' column with 1 when 'email' is not null, otherwise 0
    ceo['interpreted'] = np.where(ceo['email'].notna(), 1, 0)
    
    # Calculate total count for each 'interpreted' value
    interpreted_counts = ceo['interpreted'].value_counts().to_dict()
    
    # Append the modified DataFrame to the list
    modified_ceo_files.append(ceo)
    
    # Print count for the current DataFrame
    print(f"ceo {i + 1} # of validated points: {interpreted_counts.get(1, 0)}, # of not validated points: {interpreted_counts.get(0, 0)}")
    
    # Increment the collection counter
    collection_counter += 1

# Concatenate the modified DataFrames into one merged DataFrame
merged_ceo = pd.concat(modified_ceo_files, ignore_index=True)


# Specify the columns you want to keep in the merged DataFrame
columns_to_keep = ['plotid', 'sampleid', 'lon', 'lat', 'pl_strata',
                    'sample_geom', 'forêt ou non-forêt en 2022', "Y'a t'il de la régéneration", 
                    'Changement en 2023', 'Type de Changement 2023', 'Exploitation forestière artisanale (<5ha)', 
                    'Activité minière artisanale', 'Infrastructure routière', 'Infrastructure d’urbanisation ou habitations', 
                    'Exploitation forestière industrielle (>5ha)', 'Autre moteur', 'Descriptif autre moteur', 'Activité minière industrielle', 
                    'Agriculture industrielle', 'Agriculture paysanne', 'Type de non-forêt en 2022', 'Type de forêt en 2022', 'collection', 'interpreted']

# Select only the specified columns
merged_ceo = merged_ceo[columns_to_keep]

# Create a dictionary to map old column names to new column names
column_mapping = {
    'forêt ou non-forêt en 2022': 'Ref_FNF_2022', 
    "Y'a t'il de la régéneration": 'Ref_Regeneration',
    'Changement en 2023': 'Ref_Change_2023', 
    'Type de Changement 2023': 'Ref_Change_Type_2023', 
    'Exploitation forestière artisanale (<5ha)': 'ArtFor',
    'Activité minière artisanale': 'ArtMine',
    'Infrastructure routière': 'InfraR', 
    'Infrastructure d’urbanisation ou habitations': 'Urb',
    'Exploitation forestière industrielle (>5ha)':'IndFor', 
    'Autre moteur': 'Other', 
    'Descriptif autre moteur': 'Other_Desc', 
    'Activité minière industrielle': 'IndMine', 
    'Agriculture industrielle': 'IndAg', 
    'Agriculture paysanne': 'ArtAg', 
    'Année de Changement': 'Ref_Year_2023', 
    'Type de non-forêt en 2022':'Ref_NFtype_2022', 
    'Type de forêt en 2022':'Ref_Ftype_2022',
    'pl_strata':'TNT_stratum'
    #add other columns as needed
    #,'NOM': 'Province' # DRC province
}

# Use the rename() method to rename the columns
merged_ceo = merged_ceo.rename(columns=column_mapping)

# Define the replacements as a dictionary
replace_dict = {
    'Ref_FNF_2022': {'non-forêt': '0', 'forêt': '1'},
    'Ref_Ftype_2022': {'1 - forêt dense': '1', '2 - forêt dense sèche': '2', '3 - forêt secondaire': '3','4 - forêt claire sèche': '4','7 - forêt mangrove': '7',
                 '8 - forêt marécageuse': '8', '9 - forêt galérie': '9','10 - plantation forestière': '10', '11- savane arborée': '11'},
    'Ref_NFtype_2022': {'12 - savane arbustive': '12', '13 - savane herbacée': '13', '14 - prairie aquatique': '14', '15 - sol nu végétation éparse': '15',
                  '16 - terres cultivées': '16', '17 - zone baties': '17', '18 - eau': '18'},
    'Ref_Change_2023': {'oui':'1','non':'0','':'0'},
    'ArtFor':{'oui':'1','non':'0','':'0'},
    'IndFor':{'oui':'1','non':'0','':'0'},
    'ArtAg':{'oui':'1','non':'0','':'0'},
    'IndAg':{'oui':'1','non':'0','':'0'},
    'ArtMine':{'oui':'1','non':'0','':'0'},
    'IndMine':{'oui':'1','non':'0','':'0'},
    'Urb':{'oui':'1','non':'0','':'0'},
    'InfraR':{'oui':'1','non':'0','':'0'},
    'Other':{'oui':'1','non':'0','':'0'}
}

# Use the replace() method to replace strings in specified columns
merged_ceo.replace(replace_dict, inplace=True)

columns_to_fill = ['Ref_Change_2023','ArtAg','ArtFor','IndFor','IndAg','ArtMine','IndMine','Urb','InfraR','Other']
# Set NaN values in the selected columns to 0
merged_ceo[columns_to_fill] = merged_ceo[columns_to_fill].fillna('0')
merged_ceo['collection'] = 'coll_2023_' + merged_ceo['collection'].astype(str)

ceo 1 # of validated points: 247, # of not validated points: 0
ceo 2 # of validated points: 497, # of not validated points: 0


remove non validated points and remove duplicates

In [8]:
merged_ceo_interpreted = merged_ceo[merged_ceo['interpreted'] == 1]
len(merged_ceo_interpreted)

744

In [9]:
ceo_column_names = merged_ceo_interpreted.columns.tolist()
# Print the list of column names
print(ceo_column_names)

['plotid', 'sampleid', 'lon', 'lat', 'TNT_stratum', 'sample_geom', 'Ref_FNF_2022', 'Ref_Regeneration', 'Ref_Change_2023', 'Ref_Change_Type_2023', 'ArtFor', 'ArtMine', 'InfraR', 'Urb', 'IndFor', 'Other', 'Other_Desc', 'IndMine', 'IndAg', 'ArtAg', 'Ref_NFtype_2022', 'Ref_Ftype_2022', 'collection', 'interpreted']


In [10]:
merged_ceo_interpreted.head()

Unnamed: 0,plotid,sampleid,lon,lat,TNT_stratum,sample_geom,Ref_FNF_2022,Ref_Regeneration,Ref_Change_2023,Ref_Change_Type_2023,ArtFor,ArtMine,InfraR,Urb,IndFor,Other,Other_Desc,IndMine,IndAg,ArtAg,Ref_NFtype_2022,Ref_Ftype_2022,collection,interpreted
0,0,0,10.329144,1.166597,1,POINT(10.32914355 1.166597144),1,,0,,0,0,0,0,0,0,,0,0,0,,3,coll_2023_1,1
1,1,1,10.235449,1.674684,1,POINT(10.23544926 1.674684268),1,,0,,0,0,0,0,0,0,,0,0,0,,1,coll_2023_1,1
2,2,2,8.765895,3.44733,1,POINT(8.76589529 3.447329819),1,,0,,0,0,0,0,0,0,,0,0,0,,1,coll_2023_1,1
3,3,3,10.967397,1.110003,1,POINT(10.96739656 1.110003281),1,,0,,0,0,0,0,0,0,,0,0,0,,1,coll_2023_1,1
4,4,4,10.259973,1.766492,1,POINT(10.25997327 1.76649209),1,,0,,0,0,0,0,0,0,,0,0,0,,1,coll_2023_1,1


##### count points in different categories

In [11]:
merged_ceo_interpreted['interpreted'].value_counts(dropna=False)

interpreted
1    744
Name: count, dtype: int64

In [12]:
pd.pivot_table(merged_ceo_interpreted,values='plotid',index=['Ref_Change_2023'],columns=['interpreted'],aggfunc="count")

interpreted,1
Ref_Change_2023,Unnamed: 1_level_1
0,729
1,15


In [13]:
merged_ceo_interpreted['Ref_FNF_2022'].value_counts(dropna=False)

Ref_FNF_2022
1    666
0     78
Name: count, dtype: int64

In [14]:
merged_ceo_interpreted['Ref_Change_Type_2023'].value_counts(dropna=False)

Ref_Change_Type_2023
NaN              729
dégradation        8
déforestation      7
Name: count, dtype: int64

In [15]:
merged_ceo_interpreted['collection'].value_counts(dropna=False)

collection
coll_2023_2    497
coll_2023_1    247
Name: count, dtype: int64

##### create a LC type column

In [16]:
merged_ceo_interpreted['Ref_LCover_2022'] = merged_ceo_interpreted['Ref_Ftype_2022'].fillna(merged_ceo_interpreted['Ref_NFtype_2022'])
merged_ceo_interpreted['Ref_LCover_2022'].value_counts(dropna=False)

Ref_LCover_2022
1     506
3      91
2      42
16     28
15     23
14     14
9      10
7       7
17      6
18      6
8       6
11      2
12      1
10      1
4       1
Name: count, dtype: int64

### 2. add new columns for change and year

clean up columns and replace values

In [17]:
def f(x):
  if x['Ref_FNF_2022'] == '0' : return 'NF'
  elif x['Ref_Change_Type_2023'] == 'déforestation': return 'Def'
  elif x['Ref_Change_Type_2023'] == 'dégradation' : return 'Deg'
  else: return 'Stable'

merged_ceo_interpreted['Ref_Change_Type_2023'] = merged_ceo_interpreted.apply(f, axis=1)
merged_ceo_interpreted['Ref_Change_Type_2023'].value_counts()

Ref_Change_Type_2023
Stable    651
NF         78
Deg         8
Def         7
Name: count, dtype: int64

In [18]:
def y(x):
  if x['Ref_Change_Type_2023'] == 'Def': return 'Def2023'
  elif x['Ref_Change_Type_2023'] == 'Deg' : return 'Deg2023'
  elif x['Ref_Change_Type_2023'] == 'NF' : return 'NF'
  elif x['Ref_Change_Type_2023'] == 'Stable' : return 'Stable'
  else: return ''

merged_ceo_interpreted['Ref_Change_Year_2023'] = merged_ceo_interpreted.apply(y, axis=1)

annual_counts = merged_ceo_interpreted['Ref_Change_Year_2023'].value_counts()
annual_counts = annual_counts.sort_index()
print(annual_counts)

Ref_Change_Year_2023
Def2023      7
Deg2023      8
NF          78
Stable     651
Name: count, dtype: int64


In [30]:
merged_ceo_interpreted['Def2023'] = np.where(merged_ceo_interpreted['Ref_Change_Year_2023'] == 'Def2023', 1, 0)
merged_ceo_interpreted['Deg2023'] = np.where(merged_ceo_interpreted['Ref_Change_Year_2023'] == 'Deg2023', 1, 0)

In [31]:
merged_ceo_interpreted['Defall'] = np.where(merged_ceo_interpreted['Ref_Change_Type_2023'] == 'Def', 1, 0)
merged_ceo_interpreted['Degall'] = np.where(merged_ceo_interpreted['Ref_Change_Type_2023'] == 'Deg', 1, 0)

In [32]:
merged_ceo_interpreted['Stable'] = np.where(merged_ceo_interpreted['Ref_Change_Type_2023'] == 'Stable', 1, 0)
merged_ceo_interpreted['NF'] = np.where(merged_ceo_interpreted['Ref_Change_Type_2023'] == 'NF', 1, 0)

In [33]:
merged_ceo_interpreted['DensFor'] = np.where(merged_ceo_interpreted['Ref_LCover_2022'] == '1', 1, 0)
merged_ceo_interpreted['DensDryFor'] = np.where(merged_ceo_interpreted['Ref_LCover_2022'] == '2', 1, 0)
merged_ceo_interpreted['SecFor'] = np.where(merged_ceo_interpreted['Ref_LCover_2022'] == '3', 1, 0)
merged_ceo_interpreted['DryOpenFor'] = np.where(merged_ceo_interpreted['Ref_LCover_2022'] == '4', 1, 0)
#merged_ceo_interpreted['SubMont'] = np.where(merged_ceo_interpreted['Ref_LCover_2015'] == '5', 1, 0)
#merged_ceo_interpreted['MontFor'] = np.where(merged_ceo_interpreted['Ref_LCover_2015'] == '6', 1, 0)
merged_ceo_interpreted['Mangrove'] = np.where(merged_ceo_interpreted['Ref_LCover_2022'] == '7', 1, 0)
merged_ceo_interpreted['Swamp'] = np.where(merged_ceo_interpreted['Ref_LCover_2022'] == '8', 1, 0)
merged_ceo_interpreted['Gallery'] = np.where(merged_ceo_interpreted['Ref_LCover_2022'] == '9', 1, 0)
merged_ceo_interpreted['Plantation'] = np.where(merged_ceo_interpreted['Ref_LCover_2022'] == '10', 1, 0)
merged_ceo_interpreted['Woodland'] = np.where(merged_ceo_interpreted['Ref_LCover_2022'] == '11', 1, 0)
merged_ceo_interpreted['Shrubland'] = np.where(merged_ceo_interpreted['Ref_LCover_2022'] == '12', 1, 0)
merged_ceo_interpreted['Grassland'] = np.where(merged_ceo_interpreted['Ref_LCover_2022'] == '13', 1, 0)
merged_ceo_interpreted['Aquatic'] = np.where(merged_ceo_interpreted['Ref_LCover_2022'] == '14', 1, 0)
merged_ceo_interpreted['Bare'] = np.where(merged_ceo_interpreted['Ref_LCover_2022'] == '15', 1, 0)
merged_ceo_interpreted['Cultivated'] = np.where(merged_ceo_interpreted['Ref_LCover_2022'] == '16', 1, 0)
merged_ceo_interpreted['Builtup'] = np.where(merged_ceo_interpreted['Ref_LCover_2022'] == '17', 1, 0)
merged_ceo_interpreted['Water'] = np.where(merged_ceo_interpreted['Ref_LCover_2022'] == '18', 1, 0)

In [34]:
merged_ceo_interpreted['DensFor_Def'] = ((merged_ceo_interpreted['DensFor'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Def")).astype(int)
merged_ceo_interpreted['DensDryFor_Def'] = ((merged_ceo_interpreted['DensDryFor'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Def")).astype(int)
merged_ceo_interpreted['SecFor_Def'] = ((merged_ceo_interpreted['SecFor'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Def")).astype(int)
merged_ceo_interpreted['DryOpenFor_Def'] = ((merged_ceo_interpreted['DryOpenFor'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Def")).astype(int)
merged_ceo_interpreted['Mangrove_Def'] = ((merged_ceo_interpreted['Mangrove'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Def")).astype(int)
merged_ceo_interpreted['Swamp_Def'] = ((merged_ceo_interpreted['Swamp'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Def")).astype(int)
merged_ceo_interpreted['Gallery_Def'] = ((merged_ceo_interpreted['Gallery'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Def")).astype(int)
merged_ceo_interpreted['Plantation_Def'] = ((merged_ceo_interpreted['Plantation'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Def")).astype(int)
merged_ceo_interpreted['Woodland_Def'] = ((merged_ceo_interpreted['Woodland'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Def")).astype(int)

In [35]:
merged_ceo_interpreted['DensFor_Deg'] = ((merged_ceo_interpreted['DensFor'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Deg")).astype(int)
merged_ceo_interpreted['DensDryFor_Deg'] = ((merged_ceo_interpreted['DensDryFor'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Deg")).astype(int)
merged_ceo_interpreted['SecFor_Deg'] = ((merged_ceo_interpreted['SecFor'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Deg")).astype(int)
merged_ceo_interpreted['DryOpenFor_Deg'] = ((merged_ceo_interpreted['DryOpenFor'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Deg")).astype(int)
merged_ceo_interpreted['Mangrove_Deg'] = ((merged_ceo_interpreted['Mangrove'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Deg")).astype(int)
merged_ceo_interpreted['Swamp_Deg'] = ((merged_ceo_interpreted['Swamp'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Deg")).astype(int)
merged_ceo_interpreted['Gallery_Deg'] = ((merged_ceo_interpreted['Gallery'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Deg")).astype(int)
merged_ceo_interpreted['Plantation_Deg'] = ((merged_ceo_interpreted['Plantation'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Deg")).astype(int)
merged_ceo_interpreted['Woodland_Deg'] = ((merged_ceo_interpreted['Woodland'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Deg")).astype(int)

In [36]:
merged_ceo_interpreted['DensFor_stable'] = ((merged_ceo_interpreted['DensFor'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Stable")).astype(int)
merged_ceo_interpreted['DensDryFor_stable'] = ((merged_ceo_interpreted['DensDryFor'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Stable")).astype(int)
merged_ceo_interpreted['SecFor_stable'] = ((merged_ceo_interpreted['SecFor'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Stable")).astype(int)
merged_ceo_interpreted['DryOpenFor_stable'] = ((merged_ceo_interpreted['DryOpenFor'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Stable")).astype(int)
merged_ceo_interpreted['Mangrove_stable'] = ((merged_ceo_interpreted['Mangrove'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Stable")).astype(int)
merged_ceo_interpreted['Swamp_stable'] = ((merged_ceo_interpreted['Swamp'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Stable")).astype(int)
merged_ceo_interpreted['Gallery_stable'] = ((merged_ceo_interpreted['Gallery'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Stable")).astype(int)
merged_ceo_interpreted['Plantation_stable'] = ((merged_ceo_interpreted['Plantation'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Stable")).astype(int)
merged_ceo_interpreted['Woodland_stable'] = ((merged_ceo_interpreted['Woodland'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Stable")).astype(int)

In [38]:
columns_to_convert = ['Deg2023','Def2023','Degall','Defall','Stable','NF',
'DensFor','DensDryFor','SecFor','DryOpenFor','Mangrove','Swamp','Gallery','Plantation','Woodland','Shrubland','Grassland','Aquatic','Bare','Cultivated','Builtup','Water',
'DensFor_Def','DensDryFor_Def','SecFor_Def','DryOpenFor_Def','Mangrove_Def','Swamp_Def','Gallery_Def','Plantation_Def','Woodland_Def',
'DensFor_Deg','DensDryFor_Deg','SecFor_Deg','DryOpenFor_Deg','Mangrove_Deg','Swamp_Deg','Gallery_Deg','Plantation_Deg','Woodland_Deg',
'DensFor_stable','DensDryFor_stable','SecFor_stable','DryOpenFor_stable','Mangrove_stable','Swamp_stable','Gallery_stable','Plantation_stable','Woodland_stable']
merged_ceo_interpreted[columns_to_convert] = merged_ceo_interpreted[columns_to_convert].fillna(0)

In [39]:
annual_counts.sum()

744

In [40]:
pd.pivot_table(merged_ceo_interpreted,values='plotid',index=['Ref_Change_Type_2023'],columns=['interpreted'],aggfunc="count",margins=True,
                             margins_name='Total')

interpreted,1,Total
Ref_Change_Type_2023,Unnamed: 1_level_1,Unnamed: 2_level_1
Def,7,7
Deg,8,8
NF,78,78
Stable,651,651
Total,744,744


In [42]:
merged_ceo_column_names = merged_ceo_interpreted.columns.tolist()
# Print the list of column names
print(merged_ceo_column_names)

['plotid', 'sampleid', 'lon', 'lat', 'TNT_stratum', 'sample_geom', 'Ref_FNF_2022', 'Ref_Regeneration', 'Ref_Change_2023', 'Ref_Change_Type_2023', 'ArtFor', 'ArtMine', 'InfraR', 'Urb', 'IndFor', 'Other', 'Other_Desc', 'IndMine', 'IndAg', 'ArtAg', 'Ref_NFtype_2022', 'Ref_Ftype_2022', 'collection', 'interpreted', 'Ref_LCover_2022', 'Ref_Change_Year_2023', 'DensFor', 'DensDryFor', 'SecFor', 'DryOpenFor', 'Mangrove', 'Swamp', 'Gallery', 'Plantation', 'Woodland', 'Shrubland', 'Grassland', 'Aquatic', 'Bare', 'Cultivated', 'Builtup', 'Water', 'DensFor_Def', 'DensDryFor_Def', 'DensFor_Deg', 'DensDryFor_Deg', 'SecFor_Deg', 'DryOpenFor_Deg', 'Mangrove_Deg', 'Swamp_Deg', 'Gallery_Deg', 'Plantation_Deg', 'Woodland_Deg', 'DensFor_stable', 'DensDryFor_stable', 'SecFor_stable', 'DryOpenFor_stable', 'Mangrove_stable', 'Swamp_stable', 'Gallery_stable', 'Plantation_stable', 'Woodland_stable', 'SecFor_Def', 'DryOpenFor_Def', 'Mangrove_Def', 'Swamp_Def', 'Gallery_Def', 'Plantation_Def', 'Woodland_Def', 'De

In [43]:
merged_ceo_interpreted.head()

Unnamed: 0,plotid,sampleid,lon,lat,TNT_stratum,sample_geom,Ref_FNF_2022,Ref_Regeneration,Ref_Change_2023,Ref_Change_Type_2023,ArtFor,ArtMine,InfraR,Urb,IndFor,Other,Other_Desc,IndMine,IndAg,ArtAg,Ref_NFtype_2022,Ref_Ftype_2022,collection,interpreted,Ref_LCover_2022,Ref_Change_Year_2023,DensFor,DensDryFor,SecFor,DryOpenFor,Mangrove,Swamp,Gallery,Plantation,Woodland,Shrubland,Grassland,Aquatic,Bare,Cultivated,Builtup,Water,DensFor_Def,DensDryFor_Def,DensFor_Deg,DensDryFor_Deg,SecFor_Deg,DryOpenFor_Deg,Mangrove_Deg,Swamp_Deg,Gallery_Deg,Plantation_Deg,Woodland_Deg,DensFor_stable,DensDryFor_stable,SecFor_stable,DryOpenFor_stable,Mangrove_stable,Swamp_stable,Gallery_stable,Plantation_stable,Woodland_stable,SecFor_Def,DryOpenFor_Def,Mangrove_Def,Swamp_Def,Gallery_Def,Plantation_Def,Woodland_Def,Def2023,Deg2023,Defall,Degall,Stable,NF
0,0,0,10.329144,1.166597,1,POINT(10.32914355 1.166597144),1,,0,Stable,0,0,0,0,0,0,,0,0,0,,3,coll_2023_1,1,3,Stable,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
1,1,1,10.235449,1.674684,1,POINT(10.23544926 1.674684268),1,,0,Stable,0,0,0,0,0,0,,0,0,0,,1,coll_2023_1,1,1,Stable,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
2,2,2,8.765895,3.44733,1,POINT(8.76589529 3.447329819),1,,0,Stable,0,0,0,0,0,0,,0,0,0,,1,coll_2023_1,1,1,Stable,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
3,3,3,10.967397,1.110003,1,POINT(10.96739656 1.110003281),1,,0,Stable,0,0,0,0,0,0,,0,0,0,,1,coll_2023_1,1,1,Stable,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
4,4,4,10.259973,1.766492,1,POINT(10.25997327 1.76649209),1,,0,Stable,0,0,0,0,0,0,,0,0,0,,1,coll_2023_1,1,1,Stable,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0


In [44]:
print(merged_ceo_interpreted.columns.values)

['plotid' 'sampleid' 'lon' 'lat' 'TNT_stratum' 'sample_geom'
 'Ref_FNF_2022' 'Ref_Regeneration' 'Ref_Change_2023'
 'Ref_Change_Type_2023' 'ArtFor' 'ArtMine' 'InfraR' 'Urb' 'IndFor' 'Other'
 'Other_Desc' 'IndMine' 'IndAg' 'ArtAg' 'Ref_NFtype_2022' 'Ref_Ftype_2022'
 'collection' 'interpreted' 'Ref_LCover_2022' 'Ref_Change_Year_2023'
 'DensFor' 'DensDryFor' 'SecFor' 'DryOpenFor' 'Mangrove' 'Swamp' 'Gallery'
 'Plantation' 'Woodland' 'Shrubland' 'Grassland' 'Aquatic' 'Bare'
 'Cultivated' 'Builtup' 'Water' 'DensFor_Def' 'DensDryFor_Def'
 'DensFor_Deg' 'DensDryFor_Deg' 'SecFor_Deg' 'DryOpenFor_Deg'
 'Mangrove_Deg' 'Swamp_Deg' 'Gallery_Deg' 'Plantation_Deg' 'Woodland_Deg'
 'DensFor_stable' 'DensDryFor_stable' 'SecFor_stable' 'DryOpenFor_stable'
 'Mangrove_stable' 'Swamp_stable' 'Gallery_stable' 'Plantation_stable'
 'Woodland_stable' 'SecFor_Def' 'DryOpenFor_Def' 'Mangrove_Def'
 'Swamp_Def' 'Gallery_Def' 'Plantation_Def' 'Woodland_Def' 'Def2023'
 'Deg2023' 'Defall' 'Degall' 'Stable' 'NF']


In [45]:
merged_ceo_interpreted['Ref_Change_Year_2023'].value_counts(dropna=False)

Ref_Change_Year_2023
Stable     651
NF          78
Deg2023      8
Def2023      7
Name: count, dtype: int64

In [46]:
merged_ceo_interpreted.head()

Unnamed: 0,plotid,sampleid,lon,lat,TNT_stratum,sample_geom,Ref_FNF_2022,Ref_Regeneration,Ref_Change_2023,Ref_Change_Type_2023,ArtFor,ArtMine,InfraR,Urb,IndFor,Other,Other_Desc,IndMine,IndAg,ArtAg,Ref_NFtype_2022,Ref_Ftype_2022,collection,interpreted,Ref_LCover_2022,Ref_Change_Year_2023,DensFor,DensDryFor,SecFor,DryOpenFor,Mangrove,Swamp,Gallery,Plantation,Woodland,Shrubland,Grassland,Aquatic,Bare,Cultivated,Builtup,Water,DensFor_Def,DensDryFor_Def,DensFor_Deg,DensDryFor_Deg,SecFor_Deg,DryOpenFor_Deg,Mangrove_Deg,Swamp_Deg,Gallery_Deg,Plantation_Deg,Woodland_Deg,DensFor_stable,DensDryFor_stable,SecFor_stable,DryOpenFor_stable,Mangrove_stable,Swamp_stable,Gallery_stable,Plantation_stable,Woodland_stable,SecFor_Def,DryOpenFor_Def,Mangrove_Def,Swamp_Def,Gallery_Def,Plantation_Def,Woodland_Def,Def2023,Deg2023,Defall,Degall,Stable,NF
0,0,0,10.329144,1.166597,1,POINT(10.32914355 1.166597144),1,,0,Stable,0,0,0,0,0,0,,0,0,0,,3,coll_2023_1,1,3,Stable,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
1,1,1,10.235449,1.674684,1,POINT(10.23544926 1.674684268),1,,0,Stable,0,0,0,0,0,0,,0,0,0,,1,coll_2023_1,1,1,Stable,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
2,2,2,8.765895,3.44733,1,POINT(8.76589529 3.447329819),1,,0,Stable,0,0,0,0,0,0,,0,0,0,,1,coll_2023_1,1,1,Stable,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
3,3,3,10.967397,1.110003,1,POINT(10.96739656 1.110003281),1,,0,Stable,0,0,0,0,0,0,,0,0,0,,1,coll_2023_1,1,1,Stable,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
4,4,4,10.259973,1.766492,1,POINT(10.25997327 1.76649209),1,,0,Stable,0,0,0,0,0,0,,0,0,0,,1,coll_2023_1,1,1,Stable,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0


In [47]:
merged_ceo_interpreted['ISO'] = iso

In [48]:
print(merged_ceo_interpreted.dtypes)

plotid                    int64
sampleid                  int64
lon                     float64
lat                     float64
TNT_stratum               int64
sample_geom              object
Ref_FNF_2022             object
Ref_Regeneration         object
Ref_Change_2023          object
Ref_Change_Type_2023     object
ArtFor                   object
ArtMine                  object
InfraR                   object
Urb                      object
IndFor                   object
Other                    object
Other_Desc              float64
IndMine                  object
IndAg                    object
ArtAg                    object
Ref_NFtype_2022          object
Ref_Ftype_2022           object
collection               object
interpreted               int64
Ref_LCover_2022          object
Ref_Change_Year_2023     object
DensFor                   int64
DensDryFor                int64
SecFor                    int64
DryOpenFor                int64
Mangrove                  int64
Swamp   

### 4. export ceo data to one csv file

In [50]:
merged_ceo_interpreted.to_csv(ceo_data_out,index_label='ID')