# 1. Import necessary libraries for data handling and visualization.

In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
import matplotlib.ticker as mticker
#import ee
#import geemap

# 2. The eSBAE function:

In [2]:
def calculate_areas(db_total, strata_column, categories_column, total_area, z_score):
    
    df_full = db_total.copy()
    df_interpreted = db_total[~db_total[categories_column].isna()]
    
    # get all attributes
    categories = df_interpreted[categories_column].unique()
    
    # get strata
    strata, d = df_interpreted[strata_column].unique(), {}
    print(categories)
    # create stats for each entry
    for category in categories:
        
        if str(category) == 'nan':
            continue
            
        print(f' Calculating stats for {category}')
        # create binary class column
        df_interpreted[category] =  df_interpreted[categories_column].apply(lambda x: 1 if x == category else 0)
        print(f'There are {df_interpreted[category].sum()} entries of {category} in {categories_column}.')
        
        # initialize variables for category <> check the catergories > def, deg, gain
        categories_area, se_total = 0, 0
        d2 = {}
        for stratum in strata:
                        
            if str(stratum) == 'nan':
                continue
            
            # subset to stratum
            df_stratum = df_full[df_full[strata_column] == stratum]
            
            # get area proportion for that stratum on full dataset
            proportion_strata = len(df_stratum)/len(df_full)

            # get stratum area
            stratum_area = proportion_strata * total_area

            # get proportion of forest change within strata from interpreted data
            proportion_category = len(
                df_interpreted[(df_interpreted[strata_column] == stratum) & (df_interpreted[category] == 1)]
            ) / len(
                df_interpreted[df_interpreted[strata_column] == stratum]
            )
            
            # get area from proportion and full area
            category_stratum_area = proportion_category * stratum_area

            # get error from interpreted data for full stratum area
            var = np.var(df_interpreted[category][df_interpreted[strata_column] == stratum])
            sd = np.sqrt(var)               
            n = len(df_interpreted[df_interpreted[strata_column] == stratum])
            se = sd/np.sqrt(n) * stratum_area
                          
            # add for totals
            categories_area += category_stratum_area
            se_total += se**2
            
            # add to dictionary
            d2[f'area_stratum_{stratum}'] = category_stratum_area
            d2[f'ci_stratum_{stratum}']=z_score*se
        
        d2['area_total'] = categories_area
        d2['MOE'] = z_score*np.sqrt(se_total)
        d2['MOE_perc'] =  d2['MOE'] / categories_area * 100
        d[category] = d2
    
    
    return pd.DataFrame.from_dict(d, orient='index')

# 3 Load and prepare interpreted data from CEO Validation 
#### COG: 1001 points [07/12/2023]
#### set 1 = 1001 pts + set 2 = 2000 pts [13/12/2023]

In [34]:
#CEO_pts = pd.read_csv('/home/sepal-user/eSBAE_COG/data/ceo-NERF_2016_2022_CONGO_set1-sample-data-2023-12-06.csv', delimiter=';')
CEO_set1raw = pd.read_csv('/home/sepal-user/eSBAE_COG/data/ceo-NERF_2016_2022_CONGO_set1-sample-data-2023-12-13.csv', delimiter=',')
CEO_set2raw = pd.read_csv('/home/sepal-user/eSBAE_COG/data/ceo-NERF_2016_2022_CONGO_set2-sample-data-2023-12-13.csv', delimiter=',')

In [35]:
len(CEO_set1raw) # = 1001

1001

In [36]:
len(CEO_set2raw) # = 2000

2000

In [None]:
CEO_set1raw.columns.to_list() 
#CEO_pts.head()

In [41]:
count_set1 = CEO_set1raw['forêt ou non-forêt en 2016?'].value_counts()
print(count_set1)

forêt ou non-forêt en 2016?
forêt        637
non-forêt    364
Name: count, dtype: int64


In [46]:
#comme tous les points du set 1 ont été interpretés:
CEO_set1 = CEO_set1raw

In [42]:
count_set2 = CEO_set2raw['forêt ou non-forêt en 2016?'].value_counts()
print(count_set2)

forêt ou non-forêt en 2016?
non-forêt    891
forêt        839
Name: count, dtype: int64


In [48]:
#CEO_set2 = CEO_set2raw[CEO_set2raw['forêt ou non-forêt en 2016?'] != 'NaN']
#CEO_set2 = CEO_set2raw['forêt ou non-forêt en 2016?'].notna()

select_set2 = (CEO_set2raw['forêt ou non-forêt en 2016?'] == 'forêt') | (CEO_set2raw['forêt ou non-forêt en 2016?'] == 'non-forêt')
CEO_set2 = CEO_set2raw[select_set2]

len(CEO_set2)

1730

## 3.1 Different data checks and harmonisations 

In [50]:
# vérification doublons set 1
duplicated = CEO_set1['plotid'].duplicated().any() 
if duplicated:
    print ("problem")
else:
    print ("all good")

problem


In [51]:
doublonsCEO_set1 = CEO_set1[CEO_set1.duplicated(subset='plotid', keep=False)]
print(doublonsCEO_set1)

   plotid  sampleid        lon       lat                      email  flagged  \
7  541957    541957  11.235013 -3.836062      menguekarel@gmail.com    False   
8  541957    541957  11.235013 -3.836062  ndandoularissa7@gmail.com    False   

    collection_time analysis_duration imagery_title  imagery_attributions  \
7  2023-11-23 08:44        132.6 secs           NaN                   NaN   
8  2023-11-23 09:08        331.0 secs           NaN                   NaN   

   ... Type du changement 2 (1)  Type de moteur pour changement 2 (1)  \
7  ...                      NaN                                   NaN   
8  ...                      NaN                                   NaN   

   Le feu a t-il causé le changement ?.1  Décrivez autres (1) (0)  \
7                                    NaN                      NaN   
8                                    NaN                      NaN   

   Y-a t-il eu régénération  ?  Indiquez la date de la régénération  \
7                          N

In [52]:
CEO_set1['doublon'] = 'no'
CEO_set1['doublon'] = np.where((CEO_set1['plotid'] == 541957) & (CEO_set1['Commentaires'] == 'rentrez vos commentaires'), 'yes', CEO_set1['doublon'])

count_values_db = CEO_set1['doublon'].value_counts()
print(count_values_db)

doublon
no     1000
yes       1
Name: count, dtype: int64


In [53]:
dfCEO_set1 = CEO_set1[CEO_set1['doublon'] == 'no']
len(dfCEO_set1)

1000

In [54]:
# vérification doublons set 2
duplicated = CEO_set2['plotid'].duplicated().any() 
if duplicated:
    print ("problem")
else:
    print ("all good")

all good


In [55]:
CEO_set1['source'] = 'set1'
CEO_set2['source'] = 'set2'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  CEO_set2['source'] = 'set2'


In [56]:
CEO_cols = [ 'plotid',
 'forêt ou non-forêt en 2016?',
 'Type de non-forêt en 2016',
 'Type de forêt en 2016',
 'Y-a t-il un changement négatif sur la période 2016-2022',
 'Quel type de changement ? ',
 "Indiquez l'année du changement 1",
 'Type de moteur pour changement 1',
 'Le feu a t-il causé le changement ?',
 'Décrivez autres',
 'y-a t-il un second changement ?',
 'Type du changement 2 (1)',
 'Type de moteur pour changement 2 (1)',
 'Le feu a t-il causé le changement ?.1',
 'Décrivez autres (1) (0)',
 'Y-a t-il eu régénération  ?',
 'Indiquez la date de la régénération',
 'Définir la strate en 2022 ',
 'Type de non-forêt en 2022',
 'Type de forêt en 2022',
 'Commentaires',
  'source']

In [57]:
# concatenation set 1 + set 2
dfCEOconcat = pd.concat([CEO_set1[CEO_cols], CEO_set2[CEO_cols]], axis=0, ignore_index=True)
len(dfCEOconcat) # = 3001

2731

In [58]:
# vérification doublons set 2
doublon = dfCEOconcat['plotid'].duplicated().any() 
if duplicated:
    print ("problem")
else:
    print ("all good")

all good


In [59]:
#activites = dfCEO_pts_clean['Activités '].value_counts()
#print(activites)

In [60]:
pivot1 = pd.pivot_table(dfCEOconcat,values='source',index=['forêt ou non-forêt en 2016?'],columns=['Type de non-forêt en 2016'],aggfunc="count")
pivot1

Type de non-forêt en 2016,zone baties,eau,je ne sais pas,prairie aquatique,savane arborée/arbustive,savane herbacée,sol nu végétation éparse,terres cultivées annuelles,terres cultivées permanentes
forêt ou non-forêt en 2016?,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
non-forêt,11,53,1,48,418,569,29,111,15


In [61]:
pivot2 = pd.pivot_table(dfCEOconcat,values='source',index=['forêt ou non-forêt en 2016?'],columns=['Type de forêt en 2016'],aggfunc="count")
pivot2

Type de forêt en 2016,1 - forêt dense,10 - plantation forestière,3 - forêt secondaire,4- forêt claire,7 - forêt mangrove,8 - forêt marécageuse,9 - forêt galérie
forêt ou non-forêt en 2016?,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
forêt,694,7,522,20,1,115,117


In [77]:
## définition des catégories au niveau 1 (IPCC) - 2016
dfCEOconcat['ipcc_lulc_2016'] = 'problem'
dfCEOconcat['ipcc_lulc_2016'] = np.where((dfCEOconcat['forêt ou non-forêt en 2016?'] == 'forêt'), 'Terres forestieres', dfCEOconcat['ipcc_lulc_2016'])
dfCEOconcat['ipcc_lulc_2016'] = np.where((dfCEOconcat['Type de non-forêt en 2016'] == ' zone baties'), 'Etablissement humain', dfCEOconcat['ipcc_lulc_2016'])
dfCEOconcat['ipcc_lulc_2016'] = np.where((dfCEOconcat['Type de non-forêt en 2016'] == 'eau '), 'Terres humides', dfCEOconcat['ipcc_lulc_2016'])
dfCEOconcat['ipcc_lulc_2016'] = np.where((dfCEOconcat['Type de non-forêt en 2016'] == 'je ne sais pas'), 'Autres terres', dfCEOconcat['ipcc_lulc_2016'])
dfCEOconcat['ipcc_lulc_2016'] = np.where((dfCEOconcat['Type de non-forêt en 2016'] == 'prairie aquatique'), 'Terres humides', dfCEOconcat['ipcc_lulc_2016'])
dfCEOconcat['ipcc_lulc_2016'] = np.where((dfCEOconcat['Type de non-forêt en 2016'] == 'savane arborée/arbustive'), 'Terres gramineennes', dfCEOconcat['ipcc_lulc_2016'])
dfCEOconcat['ipcc_lulc_2016'] = np.where((dfCEOconcat['Type de non-forêt en 2016'] == 'savane herbacée'), 'Terres gramineennes', dfCEOconcat['ipcc_lulc_2016'])
dfCEOconcat['ipcc_lulc_2016'] = np.where((dfCEOconcat['Type de non-forêt en 2016'] == 'sol nu végétation éparse'), 'Autres terres', dfCEOconcat['ipcc_lulc_2016'])
dfCEOconcat['ipcc_lulc_2016'] = np.where((dfCEOconcat['Type de non-forêt en 2016'] == 'terres cultivées annuelles'), 'Terres cultivees', dfCEOconcat['ipcc_lulc_2016'])
dfCEOconcat['ipcc_lulc_2016'] = np.where((dfCEOconcat['Type de non-forêt en 2016'] == 'terres cultivées permanentes'), 'Terres cultivees', dfCEOconcat['ipcc_lulc_2016'])
dfCEOconcat['ipcc_lulc_2016'] = np.where((dfCEOconcat['forêt ou non-forêt en 2016?'] == 'NaN'), 'invalide', dfCEOconcat['ipcc_lulc_2016']) ### ne marche pas ...???

count1 = dfCEOconcat['ipcc_lulc_2016'].value_counts()
print(count1)

ipcc_lulc_2016
Terres forestieres      1476
Terres gramineennes      987
Terres cultivees         126
Terres humides           101
Autres terres             30
Etablissement humain      11
Name: count, dtype: int64


In [80]:
pivot3 = pd.pivot_table(dfCEOconcat,values='plotid',index=['forêt ou non-forêt en 2016?'],columns=['ipcc_lulc_2016'],aggfunc="count")
pivot3

ipcc_lulc_2016,Autres terres,Etablissement humain,Terres cultivees,Terres forestieres,Terres gramineennes,Terres humides
forêt ou non-forêt en 2016?,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
forêt,,,,1476.0,,
non-forêt,30.0,11.0,126.0,,987.0,101.0


In [81]:
## définition des catégories au niveau 2 (classes nationales) - 2016
dfCEOconcat['n2_lulc_2016'] = dfCEOconcat['Type de non-forêt en 2016']
dfCEOconcat.loc[dfCEOconcat['forêt ou non-forêt en 2016?'] == 'forêt', 'n2_lulc_2016'] = dfCEOconcat['Type de forêt en 2016']

In [82]:
pivot3b = pd.pivot_table(dfCEOconcat,values='plotid',index=['n2_lulc_2016'],columns=['ipcc_lulc_2016'],aggfunc="count")
pivot3b

ipcc_lulc_2016,Autres terres,Etablissement humain,Terres cultivees,Terres forestieres,Terres gramineennes,Terres humides
n2_lulc_2016,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
zone baties,,11.0,,,,
1 - forêt dense,,,,694.0,,
10 - plantation forestière,,,,7.0,,
3 - forêt secondaire,,,,522.0,,
4- forêt claire,,,,20.0,,
7 - forêt mangrove,,,,1.0,,
8 - forêt marécageuse,,,,115.0,,
9 - forêt galérie,,,,117.0,,
eau,,,,,,53.0
je ne sais pas,1.0,,,,,


In [83]:
pivot4 = pd.pivot_table(dfCEOconcat,values='plotid',index=['Définir la strate en 2022 '],columns=['Type de non-forêt en 2022'],aggfunc="count")
pivot4

Type de non-forêt en 2022,eau,je ne sais pas,prairie aquatique,savane arbustive/arborée,savane herbacée,sol nu végétation éparse,terres cultivées annuelles,terres cultivées permanentes,zone baties
Définir la strate en 2022,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
non-forêt,54,1,48,408,580,47,130,16,11


In [84]:
pivot5 = pd.pivot_table(dfCEOconcat,values='plotid',index=['Définir la strate en 2022 '],columns=['Type de forêt en 2022'],aggfunc="count")
pivot5

Type de forêt en 2022,1 - forêt dense,10 - plantation forestière,3 - forêt secondaire,4- forêt claire,8 - forêt marécageuse,9 - forêt galérie
Définir la strate en 2022,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
forêt,661,6,519,20,115,115


In [85]:
## définition des catégories au niveau 1 (IPCC) - 2022
dfCEOconcat['ipcc_lulc_2022'] = 'problem'
dfCEOconcat['ipcc_lulc_2022'] = np.where((dfCEOconcat['Définir la strate en 2022 '] == 'forêt'), 'Terres forestieres', dfCEOconcat['ipcc_lulc_2022'])
dfCEOconcat['ipcc_lulc_2022'] = np.where((dfCEOconcat['Type de non-forêt en 2022'] == 'zone baties'), 'Etablissement humain', dfCEOconcat['ipcc_lulc_2022'])
dfCEOconcat['ipcc_lulc_2022'] = np.where((dfCEOconcat['Type de non-forêt en 2022'] == 'eau'), 'Terres humides', dfCEOconcat['ipcc_lulc_2022'])
dfCEOconcat['ipcc_lulc_2022'] = np.where((dfCEOconcat['Type de non-forêt en 2022'] == 'je ne sais pas'), 'Autres terres', dfCEOconcat['ipcc_lulc_2022'])
dfCEOconcat['ipcc_lulc_2022'] = np.where((dfCEOconcat['Type de non-forêt en 2022'] == 'prairie aquatique'), 'Terres humides', dfCEOconcat['ipcc_lulc_2022'])
dfCEOconcat['ipcc_lulc_2022'] = np.where((dfCEOconcat['Type de non-forêt en 2022'] == 'savane arbustive/arborée'), 'Terres gramineennes', dfCEOconcat['ipcc_lulc_2022'])
dfCEOconcat['ipcc_lulc_2022'] = np.where((dfCEOconcat['Type de non-forêt en 2022'] == 'savane herbacée'), 'Terres gramineennes', dfCEOconcat['ipcc_lulc_2022'])
dfCEOconcat['ipcc_lulc_2022'] = np.where((dfCEOconcat['Type de non-forêt en 2022'] == 'sol nu végétation éparse'), 'Autres terres', dfCEOconcat['ipcc_lulc_2022'])
dfCEOconcat['ipcc_lulc_2022'] = np.where((dfCEOconcat['Type de non-forêt en 2022'] == 'terres cultivées annuelles'), 'Terres cultivees', dfCEOconcat['ipcc_lulc_2022'])
dfCEOconcat['ipcc_lulc_2022'] = np.where((dfCEOconcat['Type de non-forêt en 2022'] == 'terres cultivées permanentes'), 'Terres cultivees', dfCEOconcat['ipcc_lulc_2022'])
#dfCEOconcat['ipcc_lulc_2022'] = np.where((dfCEOconcat['Définir la strate en 2022 '] == 'NaN'), 'invalide', dfCEOconcat['ipcc_lulc_2022']) ### ne marche pas ...???

count1 = dfCEOconcat['ipcc_lulc_2022'].value_counts()
print(count1)

ipcc_lulc_2022
Terres forestieres      1436
Terres gramineennes      988
Terres cultivees         146
Terres humides           102
Autres terres             48
Etablissement humain      11
Name: count, dtype: int64


In [86]:
## définition des catégories au niveau 2 (classes nationales) - 2022
dfCEOconcat['n2_lulc_2022'] = dfCEOconcat['Type de non-forêt en 2022']
dfCEOconcat.loc[dfCEOconcat['Définir la strate en 2022 '] == 'forêt', 'n2_lulc_2022'] = dfCEOconcat['Type de forêt en 2022']

In [87]:
pivot5b = pd.pivot_table(dfCEOconcat,values='plotid',index=['n2_lulc_2022'],columns=['ipcc_lulc_2022'],aggfunc="count")
pivot5b

ipcc_lulc_2022,Autres terres,Etablissement humain,Terres cultivees,Terres forestieres,Terres gramineennes,Terres humides
n2_lulc_2022,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1 - forêt dense,,,,661.0,,
10 - plantation forestière,,,,6.0,,
3 - forêt secondaire,,,,519.0,,
4- forêt claire,,,,20.0,,
8 - forêt marécageuse,,,,115.0,,
9 - forêt galérie,,,,115.0,,
eau,,,,,,54.0
je ne sais pas,1.0,,,,,
prairie aquatique,,,,,,48.0
savane arbustive/arborée,,,,,408.0,


In [88]:
#matrice des transitions n1 ipcc
tmatrix_n1 = pd.pivot_table(dfCEOconcat,values='plotid',index=['ipcc_lulc_2016'],columns=['ipcc_lulc_2022'],aggfunc="count")
tmatrix_n1

ipcc_lulc_2022,Autres terres,Etablissement humain,Terres cultivees,Terres forestieres,Terres gramineennes,Terres humides
ipcc_lulc_2016,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Autres terres,27.0,,,2.0,1.0,
Etablissement humain,,11.0,,,,
Terres cultivees,,,120.0,5.0,1.0,
Terres forestieres,19.0,,19.0,1424.0,14.0,
Terres gramineennes,2.0,,7.0,5.0,972.0,1.0
Terres humides,,,,,,101.0


In [89]:
tmatrix_n2 = pd.pivot_table(dfCEOconcat,values='plotid',index=['ipcc_lulc_2016', 'n2_lulc_2016'],columns=['ipcc_lulc_2022', 'n2_lulc_2022'],aggfunc="count")
tmatrix_n2

Unnamed: 0_level_0,ipcc_lulc_2022,Autres terres,Autres terres,Etablissement humain,Terres cultivees,Terres cultivees,Terres forestieres,Terres forestieres,Terres forestieres,Terres forestieres,Terres forestieres,Terres forestieres,Terres gramineennes,Terres gramineennes,Terres humides,Terres humides
Unnamed: 0_level_1,n2_lulc_2022,je ne sais pas,sol nu végétation éparse,zone baties,terres cultivées annuelles,terres cultivées permanentes,1 - forêt dense,10 - plantation forestière,3 - forêt secondaire,4- forêt claire,8 - forêt marécageuse,9 - forêt galérie,savane arbustive/arborée,savane herbacée,eau,prairie aquatique
ipcc_lulc_2016,n2_lulc_2016,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
Autres terres,je ne sais pas,1.0,,,,,,,,,,,,,,
Autres terres,sol nu végétation éparse,,26.0,,,,,,2.0,,,,1.0,,,
Etablissement humain,zone baties,,,11.0,,,,,,,,,,,,
Terres cultivees,terres cultivées annuelles,,,,106.0,,,,4.0,,,,,1.0,,
Terres cultivees,terres cultivées permanentes,,,,,14.0,,,1.0,,,,,,,
Terres forestieres,1 - forêt dense,,5.0,,1.0,,656.0,,28.0,1.0,,,,3.0,,
Terres forestieres,10 - plantation forestière,,1.0,,,,,6.0,,,,,,,,
Terres forestieres,3 - forêt secondaire,,12.0,,17.0,1.0,4.0,,477.0,,,,1.0,10.0,,
Terres forestieres,4- forêt claire,,,,,,1.0,,,19.0,,,,,,
Terres forestieres,7 - forêt mangrove,,,,,,,,,,1.0,,,,,


In [90]:
## définition des activités
dfCEOconcat['DA1622'] = 'problem'
dfCEOconcat['DA1622'] = np.where((dfCEOconcat['ipcc_lulc_2016'] == 'Terres forestieres') & (dfCEOconcat['ipcc_lulc_2022'] != 'Terres forestieres'), 'Def', dfCEOconcat['DA1622'])
dfCEOconcat['DA1622'] = np.where((dfCEOconcat['ipcc_lulc_2016'] != 'Terres forestieres') & (dfCEOconcat['ipcc_lulc_2022'] == 'Terres forestieres'), 'Gain', dfCEOconcat['DA1622'])
dfCEOconcat['DA1622'] = np.where((dfCEOconcat['ipcc_lulc_2016'] == 'Terres forestieres') & (dfCEOconcat['ipcc_lulc_2022'] == 'Terres forestieres'), 'SF', dfCEOconcat['DA1622'])
dfCEOconcat['DA1622'] = np.where((dfCEOconcat['ipcc_lulc_2016'] != 'Terres forestieres') & (dfCEOconcat['ipcc_lulc_2022'] != 'Terres forestieres'), 'SNF', dfCEOconcat['DA1622'])

#degradation
dfCEOconcat['DA1622'] = np.where((dfCEOconcat['n2_lulc_2016'] == '1 - forêt dense') & (dfCEOconcat['n2_lulc_2022'] == '3 - forêt secondaire'), 'Deg', dfCEOconcat['DA1622'])
dfCEOconcat['DA1622'] = np.where((dfCEOconcat['n2_lulc_2016'] == '8 - forêt marécageuse') & (dfCEOconcat['n2_lulc_2022'] == '3 - forêt secondaire'), 'Deg', dfCEOconcat['DA1622'])
dfCEOconcat['DA1622'] = np.where((dfCEOconcat['n2_lulc_2016'] == '9 - forêt galérie') & (dfCEOconcat['n2_lulc_2022'] == '3 - forêt secondaire'), 'Deg', dfCEOconcat['DA1622'])

count1 = dfCEOconcat['DA1622'].value_counts()
print(count1)

DA1622
SF      1394
SNF     1243
Def       52
Deg       30
Gain      12
Name: count, dtype: int64


In [95]:
len(dfCEOconcat)

2731

#### 4. load national grid

In [27]:
#ee.Initialize()
#df_COG_grid = ee.FeatureCollection("users/andreasvollrath/COGSamples")

## scp -P 443 -r arquero@ssh.sepal.io:/home/sepal-user/module_results/esbae/my_first_esbae_project_congo_v1/COG_esbae_2015_2022_model.csv/ . (demander mdp à Amélie)
df_COG_grid = pd.read_csv('/home/sepal-user/eSBAE_COG/data/COG_esbae_2015_2022_model.csv', delimiter=',')
#len(df_COG_grid) # = 291595

In [None]:
# List all columns 
df_COG_grid.columns.tolist()

## 5. FULL dataframe with national GRID + 1k interpreted points

In [28]:
df_COG_grid.dtypes

images                 int64
mon_images             int64
bfast_change_date    float64
bfast_magnitude      float64
bfast_means          float64
                      ...   
stratum                int64
kmeans                 int64
PLOTID                 int64
LON                  float64
LAT                  float64
Length: 87, dtype: object

In [29]:
dfCEOconcat.dtypes

plotid                                                       int64
forêt ou non-forêt en 2016?                                 object
Type de non-forêt en 2016                                   object
Type de forêt en 2016                                       object
Y-a t-il un changement négatif sur la période 2016-2022     object
Quel type de changement ?                                   object
Indiquez l'année du changement 1                           float64
Type de moteur pour changement 1                            object
Le feu a t-il causé le changement ?                         object
Décrivez autres                                             object
y-a t-il un second changement ?                             object
Type du changement 2 (1)                                    object
Type de moteur pour changement 2 (1)                        object
Le feu a t-il causé le changement ?.1                       object
Décrivez autres (1) (0)                                     ob

In [91]:
dfCEOconcat.rename(columns={'plotid':'PLOTID'}, inplace=True)

In [92]:
df_COG_esbae = df_COG_grid[['PLOTID', 'kmeans']].merge(dfCEOconcat[['PLOTID', 'DA1622']], how='left', on='PLOTID')
len(df_COG_esbae)

291596

In [93]:
activites2 = df_COG_esbae['DA1622'].value_counts()
print(activites2)

DA1622
SF      1394
SNF     1243
Def       52
Deg       30
Gain      12
Name: count, dtype: int64


##### Perform area calculation using the stratum column. In this case the column is called kmeans. Use the merge dataframe (national grid points + CEO validated points)

## 6. Run the eSBAE function for the different scenarios

In [94]:
calculate_areas(db_total=df_COG_esbae, strata_column='kmeans', categories_column='DA1622', total_area=len(df_COG_esbae), z_score=1.645)

['SNF' 'SF' 'Deg' 'Gain' 'Def']
 Calculating stats for SNF
There are 1243 entries of SNF in DA1622.
 Calculating stats for SF
There are 1394 entries of SF in DA1622.
 Calculating stats for Deg
There are 30 entries of Deg in DA1622.
 Calculating stats for Gain
There are 12 entries of Gain in DA1622.
 Calculating stats for Def
There are 52 entries of Def in DA1622.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_interpreted[category] =  df_interpreted[categories_column].apply(lambda x: 1 if x == category else 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_interpreted[category] =  df_interpreted[categories_column].apply(lambda x: 1 if x == category else 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-co

Unnamed: 0,area_stratum_2,ci_stratum_2,area_stratum_1,ci_stratum_1,area_stratum_3,ci_stratum_3,area_total,MOE,MOE_perc
SNF,38519.277736,1434.762416,43327.170172,6038.847549,8145.359003,564.431109,89991.806911,6232.560099,6.925697
SF,25679.518491,1430.294132,162673.829828,6038.847549,10773.657984,578.448796,199127.006303,6232.818306,3.130072
Deg,341.742642,211.917011,0.0,0.0,544.602492,184.353116,886.345133,280.882344,31.689952
Gain,48.820377,80.27921,0.0,0.0,260.462061,128.37839,309.282439,151.412557,48.95608
Def,97.640755,113.489064,0.0,0.0,1183.91846,267.512969,1281.559215,290.590702,22.674778
