![title](../../images/header.png)

Merging CEO data phase II (2023)
-------
This notebook merges validated data from different CEO CAFI projects for 2023
###### For more information contact aurelie.shapiro@fao.org or remi.dannunzio@fao.org

In [50]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
# Set the display option to a large value to prevent text wrapping
pd.set_option('display.max_colwidth', None)

## Enter parameters

In [51]:
#identifier for country 
# COD COG CMR GAB EQG CAF
iso = 'COG'

### 1. Read all CEO sample files
download your CEO sample files from collect.earth and upload to SEPAL using FileZilla.
add as many files as you have projects

In [52]:
ceo1 = pd.read_csv('/home/sepal-user/module_results/esbae/COG/ceo-SUIVI_MNV_2022_2023_CONGO-set1-sample-data.csv')
ceo2 = pd.read_csv('/home/sepal-user/module_results/esbae/COG/ceo-SUIVI_MNV_2022_2023_CONGO-set2-sample-data.csv')

make a list of the ceo files for processing

In [53]:
ceo_files = [ceo1,ceo2]
# if you have 1 file
#ceo_files = [ceo1]

In [54]:
ceo1['collection'] = 'coll_2023_1'
ceo2['collection'] = 'coll_2023_2'

In [55]:
# output file  CEO validated data file with select columns
ceo_data_out =  '/home/sepal-user/module_results/esbae/COG/COG_all_ceo_2023_clean.csv'

### end of parameters

In [56]:
column_names = ceo1.columns.tolist()
# Print the list of column names
print(column_names)

['plotid', 'sampleid', 'sample_internal_id', 'lon', 'lat', 'email', 'flagged', 'collection_time', 'analysis_duration', 'imagery_title', 'imagery_attributions', 'sample_geom', 'forêt ou non-forêt en 2022?', 'Type de non-forêt en 2022', 'Type de forêt en 2022', 'Définir la strate en 2023 ', 'Type de non-forêt en 2023', 'Type de forêt en 2023', 'Y-a t-il un changement négatif sur la période 2022-2023', 'Quel type de changement ? ', "Indiquez l'année du changement ", 'Type de moteur pour changement 1', 'Le feu a t-il causé le changement ?', 'Décrivez autres', 'y-a t-il un second changement ?', 'Type du changement 2 (1)', 'Type de moteur pour changement 2 (1)', 'Le feu a t-il causé le changement ?.1', 'Décrivez autres (1) (0)', 'Commentaires', 'collection']


##### add a column named "interpreted" which = 1 when the point has been validated in CEO, otherwise 0
count the number of validated points per ceo file
replace forest and non-forest types with codes

In [57]:
# List to store the modified DataFrames
modified_ceo_files = []

# Initialize collection counter
collection_counter = 1

for i, ceo in enumerate(ceo_files):
       
    # Sort the DataFrame by 'date' in descending order
    ceo.sort_values(by='collection_time', ascending=True, inplace=True)

    # Remove duplicates keeping the last occurrence (the most recent date)
    ceo.drop_duplicates(subset='sample_geom', keep='last', inplace=True)

    # Reset the index
    ceo.reset_index(drop=True, inplace=True)
       
    # Add 'interpreted' column with 1 when 'email' is not null, otherwise 0
    ceo['interpreted'] = np.where(ceo['email'].notna(), 1, 0)
    
    # Calculate total count for each 'interpreted' value
    interpreted_counts = ceo['interpreted'].value_counts().to_dict()
    
    # Append the modified DataFrame to the list
    modified_ceo_files.append(ceo)
    
    # Print count for the current DataFrame
    print(f"ceo {i + 1} # of validated points: {interpreted_counts.get(1, 0)}, # of not validated points: {interpreted_counts.get(0, 0)}")

# Concatenate the modified DataFrames into one merged DataFrame
merged_ceo = pd.concat(modified_ceo_files, ignore_index=True)


# Specify the columns you want to keep in the merged DataFrame
columns_to_keep = ['plotid', 'sampleid', 'sample_internal_id', 'lon', 'lat', 'collection_time',
                   'sample_geom', 'forêt ou non-forêt en 2022?', 'Type de non-forêt en 2022', 'Type de forêt en 2022', 
                   'Définir la strate en 2023 ', 'Type de non-forêt en 2023', 'Type de forêt en 2023', 
                   'Y-a t-il un changement négatif sur la période 2022-2023', "Indiquez l'année du changement ",'Quel type de changement ? ',
                   'Type de moteur pour changement 1', 'Le feu a t-il causé le changement ?',
                   'y-a t-il un second changement ?', 'Type du changement 2 (1)', 
                   'Type de moteur pour changement 2 (1)', 'Le feu a t-il causé le changement ?.1',
                   'Commentaires', 'collection','interpreted']

# Select only the specified columns
merged_ceo = merged_ceo[columns_to_keep]

# Create a dictionary to map old column names to new column names
column_mapping = {
    'forêt ou non-forêt en 2022?': 'Ref_FNF_2022', 
    'Type de non-forêt en 2022':'Ref_NFtype_2022',
    'Type de forêt en 2022' :'Ref_Ftype_2022',
    'Définir la strate en 2023 ':'Ref_FNF_2023',
    'Type de non-forêt en 2023':'Ref_NFtype_2023',
    'Type de forêt en 2023':'Ref_Ftype_2023',
    'Y-a t-il un changement négatif sur la période 2022-2023':'Ref_Change_2023',
     "Indiquez l'année du changement ": 'Ref_Change_Year',
    'Quel type de changement ? ':'Ref_Change_Type_2023',
    'Type de moteur pour changement 1':'Ref_Driver1_2023',
    'Le feu a t-il causé le changement ?':'Ref_Fire_2023',
     'y-a t-il un second changement ?':'Ref_Change2_2023',
    'Type du changement 2 (1)':'Ref_Change2_Type_2023',
    'Type de moteur pour changement 2 (1)':'Ref_Driver2_2023',
    'Le feu a t-il causé le changement ?.1':'Ref_Fire2_2023',
}

# Use the rename() method to rename the columns
merged_ceo = merged_ceo.rename(columns=column_mapping)

# Define the replacements as a dictionary
replace_dict = {  
    'Ref_FNF_2022': {'non-forêt': '0', 'forêt': '1'},
    'Ref_Ftype_2022': {'1 - forêt dense': '1', '2 - forêt dense sèche': '2', '3 - forêt secondaire': '3','4- forêt claire': '4','7 - forêt mangrove': '7',
                 '8 - forêt marécageuse': '8', '9 - forêt galérie': '9','10 - plantation forestière': '10', '11- savane arborée': '11'},
    'Ref_NFtype_2022': {'savane arborée/arbustive': '12', 'savane herbacée': '13','prairie aquatique': '14', 'sol nu végétation éparse': '15',
                  'terres cultivées annuelles': '16', ' zone baties': '17', 'eau ': '18', 'terres cultivées permanentes':'16'},
    'Ref_FNF_2023': {'non-forêt': '0', 'forêt': '1'},
    'Ref_Ftype_2023': {'1 - forêt dense': '1', '2 - forêt dense sèche': '2', '3 - forêt secondaire': '3','4- forêt claire': '4','7 - forêt mangrove': '7',
                 '8 - forêt marécageuse': '8', '9 - forêt galérie': '9','10 - plantation forestière': '10', '11- savane arborée': '11'},
    'Ref_NFtype_2023': {'savane arbustive/arborée': '12', 'savane herbacée': '13','prairie aquatique': '14', 'sol nu végétation éparse': '15',
                  'terres cultivées annuelles': '16', 'zone baties': '17', 'eau': '18', 'terres cultivées permanentes':'16'},
    'Ref_Change_2023': {'Oui':'1','Non':'0','':'0'},
    'Ref_Change_2023': {'Oui':'1','Non':'0','':'0'},
    'Ref_Change2_2023': {'Oui':'1','Non':'0','':'0'},
    'Ref_Fire_2023': {'oui':'1','non':'0','':'0'},
    'Ref_Fire2_2023': {'Oui':'1','Non':'0','':'0'},
    'Ref_Regeneration': {'Oui':'1','Non':'0','':'0'},
    'Ref_Change_Type_2023': {'Dégradation':'Deg','Déforestation ':'Def','':'0'},
    'Ref_Change2_Type_2023': {'Dégradation':'Deg','Déforestation':'Def','':'0'},
}

# Use the replace() method to replace strings in specified columns
merged_ceo.replace(replace_dict, inplace=True)

columns_to_fill = ['Ref_Change_2023','Ref_Change2_2023']
# Set NaN values in the selected columns to 0
merged_ceo[columns_to_fill] = merged_ceo[columns_to_fill].fillna('0')

ceo 1 # of validated points: 498, # of not validated points: 0
ceo 2 # of validated points: 2729, # of not validated points: 0


remove non validated points and remove duplicates

In [58]:
merged_ceo_interpreted = merged_ceo[merged_ceo['interpreted'] == 1]
len(merged_ceo_interpreted)

3227

In [59]:
# this might take some time
unique_check = merged_ceo_interpreted['sample_geom'].nunique() == len(merged_ceo_interpreted['sample_geom'])

# Print the result
if unique_check:
    print("All values are unique!")
else:
    print("F$%§!")

All values are unique!


In [60]:
# Display rows where 'sample_geom' is duplicated
#duplicates = merged_ceo_interpreted[merged_ceo_interpreted['sample_geom'].duplicated(keep=False)]

# Show duplicates
#duplicates['collection'].value_counts(dropna=False)

In [61]:
#merged_ceo_interpreted = merged_ceo_interpreted.sort_values(by='collection', ascending=True)

# Remove duplicates keeping the last occurrence (the most recent date)
#merged_ceo_nodup = merged_ceo_interpreted.drop_duplicates(subset='sample_geom', keep='first')

# Reset the index
#merged_ceo_nodup = merged_ceo_nodup.reset_index(drop=True)

In [62]:
#len(merged_ceo_nodup)

In [63]:
#merged_ceo_interpreted = merged_ceo_nodup

In [64]:
ceo_column_names = merged_ceo_interpreted.columns.tolist()
# Print the list of column names
print(ceo_column_names)

['plotid', 'sampleid', 'sample_internal_id', 'lon', 'lat', 'collection_time', 'sample_geom', 'Ref_FNF_2022', 'Ref_NFtype_2022', 'Ref_Ftype_2022', 'Ref_FNF_2023', 'Ref_NFtype_2023', 'Ref_Ftype_2023', 'Ref_Change_2023', 'Ref_Change_Year', 'Ref_Change_Type_2023', 'Ref_Driver1_2023', 'Ref_Fire_2023', 'Ref_Change2_2023', 'Ref_Change2_Type_2023', 'Ref_Driver2_2023', 'Ref_Fire2_2023', 'Commentaires', 'collection', 'interpreted']


In [65]:
merged_ceo_interpreted.head()

Unnamed: 0,plotid,sampleid,sample_internal_id,lon,lat,collection_time,sample_geom,Ref_FNF_2022,Ref_NFtype_2022,Ref_Ftype_2022,Ref_FNF_2023,Ref_NFtype_2023,Ref_Ftype_2023,Ref_Change_2023,Ref_Change_Year,Ref_Change_Type_2023,Ref_Driver1_2023,Ref_Fire_2023,Ref_Change2_2023,Ref_Change2_Type_2023,Ref_Driver2_2023,Ref_Fire2_2023,Commentaires,collection,interpreted
0,1,1,754911309,17.777704,2.649177,2024-06-17 12:31,POINT(17.77770438848954 2.649176688632676),1,,1.0,1,,1.0,0,,,,,0,,,,rentrez vos commentaires,coll_2023_1,1
1,2,2,754911310,17.682124,0.133894,2024-06-17 12:33,POINT(17.68212364225922 0.133893893098016),1,,8.0,1,,8.0,0,,,,,0,,,,rentrez vos commentaires,coll_2023_1,1
2,3,3,754911311,14.8474,-2.433581,2024-06-17 12:38,POINT(14.847399931691658 -2.433581020443989),0,16.0,,0,16.0,,0,,,,,0,,,,rentrez vos commentaires,coll_2023_1,1
3,4,4,754911312,16.598037,0.077929,2024-06-17 12:46,POINT(16.59803675738378 0.077928850897369),1,,8.0,1,,8.0,0,,,,,0,,,,rentrez vos commentaires,coll_2023_1,1
4,5,5,754911313,14.588955,0.88785,2024-06-17 13:06,POINT(14.588954624450473 0.88784991105953),1,,1.0,1,,1.0,0,,,,,0,,,,rentrez vos commentaires,coll_2023_1,1


##### count points in different categories

In [66]:
merged_ceo_interpreted['interpreted'].value_counts(dropna=False)

interpreted
1    3227
Name: count, dtype: int64

In [67]:
merged_ceo_interpreted['collection'].value_counts(dropna=False)

collection
coll_2023_2    2729
coll_2023_1     498
Name: count, dtype: int64

In [68]:
pd.pivot_table(merged_ceo_interpreted,values='plotid',index=['Ref_Change_2023'],columns=['interpreted'],aggfunc="count")

interpreted,1
Ref_Change_2023,Unnamed: 1_level_1
0,3187
1,40


In [69]:
pd.pivot_table(merged_ceo_interpreted,values='plotid',index=['Ref_Change_Year'],columns=['interpreted'],aggfunc="count")

interpreted,1
Ref_Change_Year,Unnamed: 1_level_1
2023.0,40


In [70]:
pd.pivot_table(merged_ceo_interpreted,values='plotid',index=['collection'],columns=['interpreted'],aggfunc="count")

interpreted,1
collection,Unnamed: 1_level_1
coll_2023_1,498
coll_2023_2,2729


In [71]:
merged_ceo_interpreted['Ref_FNF_2022'].value_counts(dropna=False)

Ref_FNF_2022
1    2119
0    1108
Name: count, dtype: int64

In [72]:
merged_ceo_interpreted['Ref_Change_Type_2023'].value_counts(dropna=False)

Ref_Change_Type_2023
NaN    3187
Deg      22
Def      18
Name: count, dtype: int64

In [73]:
merged_ceo_interpreted['Ref_Change2_Type_2023'].value_counts(dropna=False)

Ref_Change2_Type_2023
NaN    3225
Def       1
Deg       1
Name: count, dtype: int64

In [74]:
merged_ceo_interpreted['collection'].value_counts(dropna=False)

collection
coll_2023_2    2729
coll_2023_1     498
Name: count, dtype: int64

##### create a LC type column

In [75]:
merged_ceo_interpreted = merged_ceo_interpreted.copy()
merged_ceo_interpreted.loc[:, 'Ref_LCover_2022'] = merged_ceo_interpreted['Ref_Ftype_2022'].fillna(merged_ceo_interpreted['Ref_NFtype_2022'])
merged_ceo_interpreted['Ref_LCover_2022'].value_counts(dropna=False)

Ref_LCover_2022
1     1162
12     553
3      525
13     314
8      201
9      190
16     104
14      57
15      48
4       29
17      18
18      14
10       8
7        4
Name: count, dtype: int64

##### if there are any "je ne sais pas" the points need to be reviewed and corrected in CEO!

### 2. add new columns for change and year

clean up columns and replace values

In [76]:
def f(x):
  if x['Ref_FNF_2022'] == '0' : return 'NF'
  elif x['Ref_Change_Type_2023'] == 'Def': return 'Def'
  elif x['Ref_Change_Type_2023'] == 'Deg' : return 'Deg'
  elif x['Ref_Change2_Type_2023'] == 'Deg' : return 'Deg'
  elif x['Ref_Change2_Type_2023'] == 'Def' : return 'Def'
  else: return 'Stable'

merged_ceo_interpreted['Ref_Change_Type_2023'] =merged_ceo_interpreted.apply(f, axis=1)
merged_ceo_interpreted['Ref_Change_Type_2023'].value_counts()

Ref_Change_Type_2023
Stable    2080
NF        1108
Deg         20
Def         19
Name: count, dtype: int64

In [77]:
def y(x):
  if x['Ref_Change_Type_2023'] == 'Def': return 'Def2023'
  elif x['Ref_Change_Type_2023'] == 'Deg' : return 'Deg2023'
  elif x['Ref_Change_Type_2023'] == 'NF' : return 'NF'
  elif x['Ref_Change_Type_2023'] == 'Stable' : return 'Stable'
  else: return ''

merged_ceo_interpreted.loc[:,'Ref_Change_Year_2023'] = merged_ceo_interpreted.apply(y, axis=1)

annual_counts = merged_ceo_interpreted['Ref_Change_Year_2023'].value_counts()
annual_counts = annual_counts.sort_index()
print(annual_counts)

Ref_Change_Year_2023
Def2023      19
Deg2023      20
NF         1108
Stable     2080
Name: count, dtype: int64


In [78]:
# Define the new column based on conditions
merged_ceo_interpreted.loc[:,'Ref_Change_LCover_2023'] = merged_ceo_interpreted.apply(lambda row: f"{row['Ref_Change_Type_2023']}_{row['Ref_LCover_2022']}", axis=1)

In [80]:
merged_ceo_interpreted.loc[:,'Def2023'] = np.where(merged_ceo_interpreted['Ref_Change_Year_2023'] == 'Def2023', 1, 0)
merged_ceo_interpreted.loc[:,'Deg2023'] = np.where(merged_ceo_interpreted['Ref_Change_Year_2023'] == 'Deg2023', 1, 0)

In [81]:
merged_ceo_interpreted.loc[:,'Defall'] = np.where(merged_ceo_interpreted['Ref_Change_Type_2023'] == 'Def', 1, 0)
merged_ceo_interpreted.loc[:,'Degall'] = np.where(merged_ceo_interpreted['Ref_Change_Type_2023'] == 'Deg', 1, 0)

In [82]:
merged_ceo_interpreted.loc[:,'Stable'] = np.where(merged_ceo_interpreted['Ref_Change_Type_2023'] == 'Stable', 1, 0)
merged_ceo_interpreted.loc[:,'NF'] = np.where(merged_ceo_interpreted['Ref_Change_Type_2023'] == 'NF', 1, 0)

In [83]:
merged_ceo_interpreted.loc[:,'DensFor'] = np.where(merged_ceo_interpreted['Ref_LCover_2022'] == '1', 1, 0)
merged_ceo_interpreted.loc[:,'DensDryFor'] = np.where(merged_ceo_interpreted['Ref_LCover_2022'] == '2', 1, 0)
merged_ceo_interpreted.loc[:,'SecFor'] = np.where(merged_ceo_interpreted['Ref_LCover_2022'] == '3', 1, 0)
merged_ceo_interpreted.loc[:,'DryOpenFor'] = np.where(merged_ceo_interpreted['Ref_LCover_2022'] == '4', 1, 0)
#merged_ceo_interpreted['SubMont'] = np.where(merged_ceo_interpreted['Ref_LCover_2015'] == '5', 1, 0)
#merged_ceo_interpreted['MontFor'] = np.where(merged_ceo_interpreted['Ref_LCover_2015'] == '6', 1, 0)
merged_ceo_interpreted.loc[:,'Mangrove'] = np.where(merged_ceo_interpreted['Ref_LCover_2022'] == '7', 1, 0)
merged_ceo_interpreted.loc[:,'Swamp'] = np.where(merged_ceo_interpreted['Ref_LCover_2022'] == '8', 1, 0)
merged_ceo_interpreted.loc[:,'Gallery'] = np.where(merged_ceo_interpreted['Ref_LCover_2022'] == '9', 1, 0)
merged_ceo_interpreted.loc[:,'Plantation'] = np.where(merged_ceo_interpreted['Ref_LCover_2022'] == '10', 1, 0)
merged_ceo_interpreted.loc[:,'Woodland'] = np.where(merged_ceo_interpreted['Ref_LCover_2022'] == '11', 1, 0)
merged_ceo_interpreted.loc[:,'Shrubland'] = np.where(merged_ceo_interpreted['Ref_LCover_2022'] == '12', 1, 0)
merged_ceo_interpreted.loc[:,'Grassland'] = np.where(merged_ceo_interpreted['Ref_LCover_2022'] == '13', 1, 0)
merged_ceo_interpreted.loc[:,'Aquatic'] = np.where(merged_ceo_interpreted['Ref_LCover_2022'] == '14', 1, 0)
merged_ceo_interpreted.loc[:,'Bare'] = np.where(merged_ceo_interpreted['Ref_LCover_2022'] == '15', 1, 0)
merged_ceo_interpreted.loc[:,'Cultivated'] = np.where(merged_ceo_interpreted['Ref_LCover_2022'] == '16', 1, 0)
merged_ceo_interpreted.loc[:,'Builtup'] = np.where(merged_ceo_interpreted['Ref_LCover_2022'] == '17', 1, 0)
merged_ceo_interpreted.loc[:,'Water'] = np.where(merged_ceo_interpreted['Ref_LCover_2022'] == '18', 1, 0)

In [84]:
merged_ceo_interpreted.loc[:,'DensFor_Def'] = ((merged_ceo_interpreted['DensFor'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Def")).astype(int)
merged_ceo_interpreted.loc[:,'DensDryFor_Def'] = ((merged_ceo_interpreted['DensDryFor'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Def")).astype(int)
merged_ceo_interpreted.loc[:,'SecFor_Def'] = ((merged_ceo_interpreted['SecFor'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Def")).astype(int)
merged_ceo_interpreted.loc[:,'DryOpenFor_Def'] = ((merged_ceo_interpreted['DryOpenFor'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Def")).astype(int)
merged_ceo_interpreted.loc[:,'Mangrove_Def'] = ((merged_ceo_interpreted['Mangrove'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Def")).astype(int)
merged_ceo_interpreted.loc[:,'Swamp_Def'] = ((merged_ceo_interpreted['Swamp'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Def")).astype(int)
merged_ceo_interpreted.loc[:,'Gallery_Def'] = ((merged_ceo_interpreted['Gallery'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Def")).astype(int)
merged_ceo_interpreted.loc[:,'Plantation_Def'] = ((merged_ceo_interpreted['Plantation'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Def")).astype(int)
merged_ceo_interpreted.loc[:,'Woodland_Def'] = ((merged_ceo_interpreted['Woodland'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Def")).astype(int)

In [85]:
merged_ceo_interpreted.loc[:,'DensFor_Deg'] = ((merged_ceo_interpreted['DensFor'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Deg")).astype(int)
merged_ceo_interpreted.loc[:,'DensDryFor_Deg'] = ((merged_ceo_interpreted['DensDryFor'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Deg")).astype(int)
merged_ceo_interpreted.loc[:,'SecFor_Deg'] = ((merged_ceo_interpreted['SecFor'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Deg")).astype(int)
merged_ceo_interpreted.loc[:,'DryOpenFor_Deg'] = ((merged_ceo_interpreted['DryOpenFor'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Deg")).astype(int)
merged_ceo_interpreted.loc[:,'Mangrove_Deg'] = ((merged_ceo_interpreted['Mangrove'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Deg")).astype(int)
merged_ceo_interpreted.loc[:,'Swamp_Deg'] = ((merged_ceo_interpreted['Swamp'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Deg")).astype(int)
merged_ceo_interpreted.loc[:,'Gallery_Deg'] = ((merged_ceo_interpreted['Gallery'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Deg")).astype(int)
merged_ceo_interpreted.loc[:,'Plantation_Deg'] = ((merged_ceo_interpreted['Plantation'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Deg")).astype(int)
merged_ceo_interpreted.loc[:,'Woodland_Deg'] = ((merged_ceo_interpreted['Woodland'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Deg")).astype(int)

In [86]:
merged_ceo_interpreted.loc[:,'DensFor_Stable'] = ((merged_ceo_interpreted['DensFor'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Stable")).astype(int)
merged_ceo_interpreted.loc[:,'DensDryFor_Stable'] = ((merged_ceo_interpreted['DensDryFor'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Stable")).astype(int)
merged_ceo_interpreted.loc[:,'SecFor_Stable'] = ((merged_ceo_interpreted['SecFor'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Stable")).astype(int)
merged_ceo_interpreted.loc[:,'DryOpenFor_Stable'] = ((merged_ceo_interpreted['DryOpenFor'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Stable")).astype(int)
merged_ceo_interpreted.loc[:,'Mangrove_Stable'] = ((merged_ceo_interpreted['Mangrove'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Stable")).astype(int)
merged_ceo_interpreted.loc[:,'Swamp_Stable'] = ((merged_ceo_interpreted['Swamp'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Stable")).astype(int)
merged_ceo_interpreted.loc[:,'Gallery_Stable'] = ((merged_ceo_interpreted['Gallery'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Stable")).astype(int)
merged_ceo_interpreted.loc[:,'Plantation_Stable'] = ((merged_ceo_interpreted['Plantation'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Stable")).astype(int)
merged_ceo_interpreted.loc[:,'Woodland_Stable'] = ((merged_ceo_interpreted['Woodland'] == 1) & (merged_ceo_interpreted['Ref_Change_Type_2023'] == "Stable")).astype(int)

In [88]:
columns_to_convert = ['Deg2023','Def2023','Degall','Defall','Stable','NF',
'DensFor','DensDryFor','SecFor','DryOpenFor','Mangrove','Swamp','Gallery','Plantation','Woodland','Shrubland','Grassland','Aquatic','Bare','Cultivated','Builtup','Water',
'DensFor_Def','DensDryFor_Def','SecFor_Def','DryOpenFor_Def','Mangrove_Def','Swamp_Def','Gallery_Def','Plantation_Def','Woodland_Def',
'DensFor_Deg','DensDryFor_Deg','SecFor_Deg','DryOpenFor_Deg','Mangrove_Deg','Swamp_Deg','Gallery_Deg','Plantation_Deg','Woodland_Deg',
'DensFor_Stable','DensDryFor_Stable','SecFor_Stable','DryOpenFor_Stable','Mangrove_Stable','Swamp_Stable','Gallery_Stable','Plantation_Stable','Woodland_Stable']
merged_ceo_interpreted.loc[:,columns_to_convert] = merged_ceo_interpreted[columns_to_convert].fillna(0)

In [89]:
annual_counts.sum()

3227

In [90]:
pd.pivot_table(merged_ceo_interpreted,values='plotid',index=['Ref_Change_Type_2023'],columns=['interpreted'],aggfunc="count",margins=True,
                             margins_name='Total')

interpreted,1,Total
Ref_Change_Type_2023,Unnamed: 1_level_1,Unnamed: 2_level_1
Def,19,19
Deg,20,20
NF,1108,1108
Stable,2080,2080
Total,3227,3227


In [91]:
pd.pivot_table(merged_ceo_interpreted,values='plotid',index=['Woodland_Def'],columns=['interpreted'],aggfunc="count",margins=True,
                             margins_name='Total')

interpreted,1,Total
Woodland_Def,Unnamed: 1_level_1,Unnamed: 2_level_1
0,3227,3227
Total,3227,3227


In [93]:
pd.pivot_table(merged_ceo_interpreted,values='plotid',index=['Ref_Change_LCover_2023'],columns=['collection'],aggfunc="count",margins=True,
                             margins_name='Total')

collection,coll_2023_1,coll_2023_2,Total
Ref_Change_LCover_2023,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Def_1,,1.0,1
Def_3,,17.0,17
Def_9,,1.0,1
Deg_1,,5.0,5
Deg_3,,13.0,13
Deg_9,,2.0,2
NF_12,53.0,500.0,553
NF_13,53.0,261.0,314
NF_14,10.0,47.0,57
NF_15,7.0,41.0,48


In [94]:
# Create a matrix showing the number of matching values between two columns
matrix = pd.crosstab(merged_ceo_interpreted['Ref_LCover_2022'], merged_ceo_interpreted['Ref_Change_Type_2023'], rownames=['Ref_LCover'], colnames=['Ref_Change_Type'])

# Display the matrix
print(matrix)

Ref_Change_Type  Def  Deg   NF  Stable
Ref_LCover                            
1                  1    5    0    1156
10                 0    0    0       8
12                 0    0  553       0
13                 0    0  314       0
14                 0    0   57       0
15                 0    0   48       0
16                 0    0  104       0
17                 0    0   18       0
18                 0    0   14       0
3                 17   13    0     495
4                  0    0    0      29
7                  0    0    0       4
8                  0    0    0     201
9                  1    2    0     187


In [95]:
merged_ceo_column_names = merged_ceo_interpreted.columns.tolist()
# Print the list of column names
print(merged_ceo_column_names)

['plotid', 'sampleid', 'sample_internal_id', 'lon', 'lat', 'collection_time', 'sample_geom', 'Ref_FNF_2022', 'Ref_NFtype_2022', 'Ref_Ftype_2022', 'Ref_FNF_2023', 'Ref_NFtype_2023', 'Ref_Ftype_2023', 'Ref_Change_2023', 'Ref_Change_Year', 'Ref_Change_Type_2023', 'Ref_Driver1_2023', 'Ref_Fire_2023', 'Ref_Change2_2023', 'Ref_Change2_Type_2023', 'Ref_Driver2_2023', 'Ref_Fire2_2023', 'Commentaires', 'collection', 'interpreted', 'Ref_LCover_2022', 'Ref_Change_Year_2023', 'Ref_Change_LCover_2023', 'Def2023', 'Deg2023', 'Defall', 'Degall', 'Stable', 'NF', 'DensFor', 'DensDryFor', 'SecFor', 'DryOpenFor', 'Mangrove', 'Swamp', 'Gallery', 'Plantation', 'Woodland', 'Shrubland', 'Grassland', 'Aquatic', 'Bare', 'Cultivated', 'Builtup', 'Water', 'DensFor_Def', 'DensDryFor_Def', 'SecFor_Def', 'DryOpenFor_Def', 'Mangrove_Def', 'Swamp_Def', 'Gallery_Def', 'Plantation_Def', 'Woodland_Def', 'DensFor_Deg', 'DensDryFor_Deg', 'SecFor_Deg', 'DryOpenFor_Deg', 'Mangrove_Deg', 'Swamp_Deg', 'Gallery_Deg', 'Plantati

In [96]:
merged_ceo_interpreted.head()

Unnamed: 0,plotid,sampleid,sample_internal_id,lon,lat,collection_time,sample_geom,Ref_FNF_2022,Ref_NFtype_2022,Ref_Ftype_2022,Ref_FNF_2023,Ref_NFtype_2023,Ref_Ftype_2023,Ref_Change_2023,Ref_Change_Year,Ref_Change_Type_2023,Ref_Driver1_2023,Ref_Fire_2023,Ref_Change2_2023,Ref_Change2_Type_2023,Ref_Driver2_2023,Ref_Fire2_2023,Commentaires,collection,interpreted,Ref_LCover_2022,Ref_Change_Year_2023,Ref_Change_LCover_2023,Def2023,Deg2023,Defall,Degall,Stable,NF,DensFor,DensDryFor,SecFor,DryOpenFor,Mangrove,Swamp,Gallery,Plantation,Woodland,Shrubland,Grassland,Aquatic,Bare,Cultivated,Builtup,Water,DensFor_Def,DensDryFor_Def,SecFor_Def,DryOpenFor_Def,Mangrove_Def,Swamp_Def,Gallery_Def,Plantation_Def,Woodland_Def,DensFor_Deg,DensDryFor_Deg,SecFor_Deg,DryOpenFor_Deg,Mangrove_Deg,Swamp_Deg,Gallery_Deg,Plantation_Deg,Woodland_Deg,DensFor_Stable,DensDryFor_Stable,SecFor_Stable,DryOpenFor_Stable,Mangrove_Stable,Swamp_Stable,Gallery_Stable,Plantation_Stable,Woodland_Stable
0,1,1,754911309,17.777704,2.649177,2024-06-17 12:31,POINT(17.77770438848954 2.649176688632676),1,,1.0,1,,1.0,0,,Stable,,,0,,,,rentrez vos commentaires,coll_2023_1,1,1,Stable,Stable_1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
1,2,2,754911310,17.682124,0.133894,2024-06-17 12:33,POINT(17.68212364225922 0.133893893098016),1,,8.0,1,,8.0,0,,Stable,,,0,,,,rentrez vos commentaires,coll_2023_1,1,8,Stable,Stable_8,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
2,3,3,754911311,14.8474,-2.433581,2024-06-17 12:38,POINT(14.847399931691658 -2.433581020443989),0,16.0,,0,16.0,,0,,NF,,,0,,,,rentrez vos commentaires,coll_2023_1,1,16,NF,NF_16,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,4,4,754911312,16.598037,0.077929,2024-06-17 12:46,POINT(16.59803675738378 0.077928850897369),1,,8.0,1,,8.0,0,,Stable,,,0,,,,rentrez vos commentaires,coll_2023_1,1,8,Stable,Stable_8,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
4,5,5,754911313,14.588955,0.88785,2024-06-17 13:06,POINT(14.588954624450473 0.88784991105953),1,,1.0,1,,1.0,0,,Stable,,,0,,,,rentrez vos commentaires,coll_2023_1,1,1,Stable,Stable_1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0


In [97]:
print(merged_ceo_interpreted.columns.values)

['plotid' 'sampleid' 'sample_internal_id' 'lon' 'lat' 'collection_time'
 'sample_geom' 'Ref_FNF_2022' 'Ref_NFtype_2022' 'Ref_Ftype_2022'
 'Ref_FNF_2023' 'Ref_NFtype_2023' 'Ref_Ftype_2023' 'Ref_Change_2023'
 'Ref_Change_Year' 'Ref_Change_Type_2023' 'Ref_Driver1_2023'
 'Ref_Fire_2023' 'Ref_Change2_2023' 'Ref_Change2_Type_2023'
 'Ref_Driver2_2023' 'Ref_Fire2_2023' 'Commentaires' 'collection'
 'interpreted' 'Ref_LCover_2022' 'Ref_Change_Year_2023'
 'Ref_Change_LCover_2023' 'Def2023' 'Deg2023' 'Defall' 'Degall' 'Stable'
 'NF' 'DensFor' 'DensDryFor' 'SecFor' 'DryOpenFor' 'Mangrove' 'Swamp'
 'Gallery' 'Plantation' 'Woodland' 'Shrubland' 'Grassland' 'Aquatic'
 'Bare' 'Cultivated' 'Builtup' 'Water' 'DensFor_Def' 'DensDryFor_Def'
 'SecFor_Def' 'DryOpenFor_Def' 'Mangrove_Def' 'Swamp_Def' 'Gallery_Def'
 'Plantation_Def' 'Woodland_Def' 'DensFor_Deg' 'DensDryFor_Deg'
 'SecFor_Deg' 'DryOpenFor_Deg' 'Mangrove_Deg' 'Swamp_Deg' 'Gallery_Deg'
 'Plantation_Deg' 'Woodland_Deg' 'DensFor_Stable' 'DensDryFor_

In [98]:
merged_ceo_interpreted['Ref_Change_Year_2023'].value_counts(dropna=False)

Ref_Change_Year_2023
Stable     2080
NF         1108
Deg2023      20
Def2023      19
Name: count, dtype: int64

In [99]:
merged_ceo_interpreted.head()

Unnamed: 0,plotid,sampleid,sample_internal_id,lon,lat,collection_time,sample_geom,Ref_FNF_2022,Ref_NFtype_2022,Ref_Ftype_2022,Ref_FNF_2023,Ref_NFtype_2023,Ref_Ftype_2023,Ref_Change_2023,Ref_Change_Year,Ref_Change_Type_2023,Ref_Driver1_2023,Ref_Fire_2023,Ref_Change2_2023,Ref_Change2_Type_2023,Ref_Driver2_2023,Ref_Fire2_2023,Commentaires,collection,interpreted,Ref_LCover_2022,Ref_Change_Year_2023,Ref_Change_LCover_2023,Def2023,Deg2023,Defall,Degall,Stable,NF,DensFor,DensDryFor,SecFor,DryOpenFor,Mangrove,Swamp,Gallery,Plantation,Woodland,Shrubland,Grassland,Aquatic,Bare,Cultivated,Builtup,Water,DensFor_Def,DensDryFor_Def,SecFor_Def,DryOpenFor_Def,Mangrove_Def,Swamp_Def,Gallery_Def,Plantation_Def,Woodland_Def,DensFor_Deg,DensDryFor_Deg,SecFor_Deg,DryOpenFor_Deg,Mangrove_Deg,Swamp_Deg,Gallery_Deg,Plantation_Deg,Woodland_Deg,DensFor_Stable,DensDryFor_Stable,SecFor_Stable,DryOpenFor_Stable,Mangrove_Stable,Swamp_Stable,Gallery_Stable,Plantation_Stable,Woodland_Stable
0,1,1,754911309,17.777704,2.649177,2024-06-17 12:31,POINT(17.77770438848954 2.649176688632676),1,,1.0,1,,1.0,0,,Stable,,,0,,,,rentrez vos commentaires,coll_2023_1,1,1,Stable,Stable_1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
1,2,2,754911310,17.682124,0.133894,2024-06-17 12:33,POINT(17.68212364225922 0.133893893098016),1,,8.0,1,,8.0,0,,Stable,,,0,,,,rentrez vos commentaires,coll_2023_1,1,8,Stable,Stable_8,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
2,3,3,754911311,14.8474,-2.433581,2024-06-17 12:38,POINT(14.847399931691658 -2.433581020443989),0,16.0,,0,16.0,,0,,NF,,,0,,,,rentrez vos commentaires,coll_2023_1,1,16,NF,NF_16,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,4,4,754911312,16.598037,0.077929,2024-06-17 12:46,POINT(16.59803675738378 0.077928850897369),1,,8.0,1,,8.0,0,,Stable,,,0,,,,rentrez vos commentaires,coll_2023_1,1,8,Stable,Stable_8,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
4,5,5,754911313,14.588955,0.88785,2024-06-17 13:06,POINT(14.588954624450473 0.88784991105953),1,,1.0,1,,1.0,0,,Stable,,,0,,,,rentrez vos commentaires,coll_2023_1,1,1,Stable,Stable_1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0


In [100]:
merged_ceo_interpreted.loc[:,'ISO'] = iso

In [101]:
print(merged_ceo_interpreted.dtypes)

plotid                      int64
sampleid                    int64
sample_internal_id          int64
lon                       float64
lat                       float64
collection_time            object
sample_geom                object
Ref_FNF_2022               object
Ref_NFtype_2022            object
Ref_Ftype_2022             object
Ref_FNF_2023               object
Ref_NFtype_2023            object
Ref_Ftype_2023             object
Ref_Change_2023            object
Ref_Change_Year           float64
Ref_Change_Type_2023       object
Ref_Driver1_2023           object
Ref_Fire_2023              object
Ref_Change2_2023           object
Ref_Change2_Type_2023      object
Ref_Driver2_2023           object
Ref_Fire2_2023             object
Commentaires               object
collection                 object
interpreted                 int64
Ref_LCover_2022            object
Ref_Change_Year_2023       object
Ref_Change_LCover_2023     object
Def2023                     int64
Deg2023       

### 4. export ceo data to one csv file

In [102]:
merged_ceo_interpreted.to_csv(ceo_data_out,index_label='ID')