This notebook merges all the district-aggregated data downloaded from Earth Engine 

In [3]:
import geopandas as gpd
import matplotlib.pyplot as plt

In [4]:
root = "ADD ROOT FOLDER"

deforested_files = {
    "DW": "deforested_area_district_year_DW.geojson",
    "ESA": "deforested_area_district_year_ESA.geojson",
    "ESRI": "deforested_area_district_year_ESRI.geojson",
    "FROM_GLC": "deforested_area_district_year_FROM_GLC.geojson",
    "MFGFC": "deforested_area_district_year_MFGFC.geojson",
    "PALSAR": "deforested_area_district_year_PALSAR.geojson"
}

forested_files = {
    "DW": "forested_area_district_year_DW.geojson",
    "ESA": "forested_area_district_year_ESA.geojson",
    "ESRI": "forested_area_district_year_ESRI.geojson",
    "FROM_GLC": "forested_area_district_year_FROM_GLC.geojson",
    "MFGFC": "forested_area_district_year_MFGFC.geojson",
    "PALSAR": "forested_area_district_year_PALSAR.geojson"
}

In [6]:
dw_forested = gpd.read_file(root + forested_files["DW"])
palsar_forested = gpd.read_file(root + forested_files["PALSAR"])
esa_forested = gpd.read_file(root + forested_files["ESA"])
from_glc_forested = gpd.read_file(root + forested_files["FROM_GLC"])  
mfgfc_forested = gpd.read_file(root + forested_files["MFGFC"]) 
esri_forested = gpd.read_file(root + forested_files["ESRI"]) 

In [12]:

def get_sum(data):
    data = data[data["year"] == 2020]
    print(data["sum"].sum() / 1000000)

In [18]:
get_sum(esri_forested)

162083.36159819303


In [40]:
dw_deforested = gpd.read_file(root + deforested_files["DW"])
palsar_deforested = gpd.read_file(root + deforested_files["PALSAR"])
esa_deforested = gpd.read_file(root + deforested_files["ESA"])
from_glc_deforested = gpd.read_file(root + deforested_files["FROM_GLC"])
mfgfc_deforested = gpd.read_file(root + deforested_files["MFGFC"]) 
esri_deforested = gpd.read_file(root + deforested_files["ESRI"]) 

In [41]:
def reformat_data(df, name):
    
    return df[["ADM2_PCODE", "sum", "year", "geometry"]].rename(
        columns ={"ADM2_PCODE": "district", "sum": name}
    )

In [42]:
dw_forested_reformatted = reformat_data(dw_forested, "DW")
palsar_forested_reformatted = reformat_data(palsar_forested, "PALSAR")
esa_forested_reformatted = reformat_data(esa_forested, "ESA")
from_glc_forested_reformatted = reformat_data(from_glc_forested, "FROM_GLC")
mfgfc_forested_reformatted = reformat_data(mfgfc_forested, "MFGFC")
esri_forested_reformatted = reformat_data(esri_forested, "ESRI")

# Reformat deforested data
dw_deforested_reformatted = reformat_data(dw_deforested, "DW")
palsar_deforested_reformatted = reformat_data(palsar_deforested, "PALSAR")
esa_deforested_reformatted = reformat_data(esa_deforested, "ESA")
from_glc_deforested_reformatted = reformat_data(from_glc_deforested, "FROM_GLC")
mfgfc_deforested_reformatted = reformat_data(mfgfc_deforested, "MFGFC")
esri_deforested_reformatted = reformat_data(esri_deforested, "ESRI")

In [43]:
forested_merged = mfgfc_forested_reformatted.copy()  # Start with the first dataset

# List of other datasets to merge
other_forested = [palsar_forested_reformatted, esa_forested_reformatted, 
                  from_glc_forested_reformatted, dw_forested_reformatted, esri_forested_reformatted]

# Merging other datasets into forested_merged
for gdf in other_forested:
    forested_merged = forested_merged.merge(gdf.drop(columns='geometry'), on=['district', 'year'], how='outer')

In [44]:
deforested_merged = mfgfc_deforested_reformatted.copy()  # Start with the first dataset

# List of other datasets to merge
other_deforested = [palsar_deforested_reformatted, esa_deforested_reformatted, 
                  from_glc_deforested_reformatted, dw_deforested_reformatted, esri_deforested_reformatted]

# Merging other datasets into forested_merged
for gdf in other_deforested:
    deforested_merged = deforested_merged.merge(gdf.drop(columns='geometry'), on=['district', 'year'], how='outer')

In [45]:
forested_merged_gdf = gpd.GeoDataFrame(forested_merged, geometry='geometry')
forested_merged_gdf.to_file(root+"merged_forested_data.geojson", driver='GeoJSON', index=False)

In [46]:
deforested_merged_gdf = gpd.GeoDataFrame(deforested_merged, geometry='geometry')
deforested_merged_gdf.to_file(root+"merged_deforested_data.geojson", driver='GeoJSON', index=False)