# Analyze Darfur Label Distribution

Author: Ivan Zvonkov

Last modified: Feb 15, 2024

Description: Takes processed csv files from data/datasets and converts to simple csv to be uploaded as a GEE table asset.


In [1]:
import pandas as pd
import ee
import geemap

In [2]:
ee.Initialize()

## 1. Assess Reference Samples per Region and Year

In [3]:
# Load reference samples
df_2022 = pd.read_csv("GedarefDarfurAlJazirah2022_gee.csv")
df_2023 = pd.read_csv("GedarefDarfurAlJazirah2023_gee.csv")

In [4]:
# Load ROIs from Google Earth Engine
roi_fc = ee.FeatureCollection("projects/ee-gmuhawenayo/assets/rois")
Central_Darfur_roi = roi_fc.filter("NAME_1 == 'Central Darfur'")
West_Darfur_roi = roi_fc.filter("NAME_1 == 'West Darfur'")
South_Darfur_roi = roi_fc.filter("NAME_1 == 'South Darfur'")

In [5]:
# Convert reference sample sets to feature collections
def ee_feature_from_row(row):
    return ee.Feature(ee.Geometry.Point(row.longitude, row.latitude), {"label": row.label})

ee_features_2022 = df_2022.apply(ee_feature_from_row, axis=1).to_list()
ee_features_2023 = df_2023.apply(ee_feature_from_row, axis=1).to_list()
ee_fc_2022 = ee.FeatureCollection(ee_features_2022)
ee_fc_2023 = ee.FeatureCollection(ee_features_2023)

In [6]:
# Get samples specific to each region
df_2022_Central_Darfur = geemap.ee_to_gdf(ee_fc_2022.filterBounds(Central_Darfur_roi))
df_2022_West_Darfur = geemap.ee_to_gdf(ee_fc_2022.filterBounds(West_Darfur_roi))
df_2022_South_Darfur = geemap.ee_to_gdf(ee_fc_2022.filterBounds(South_Darfur_roi))

df_2023_Central_Darfur = geemap.ee_to_gdf(ee_fc_2023.filterBounds(Central_Darfur_roi))
df_2023_West_Darfur = geemap.ee_to_gdf(ee_fc_2023.filterBounds(West_Darfur_roi))
df_2023_South_Darfur = geemap.ee_to_gdf(ee_fc_2023.filterBounds(South_Darfur_roi))

In [7]:
# Divide by 10e3 to convert m2 to ha, divide by 10e9 to convert to Mha
Central_Darfur_Mha = Central_Darfur_roi.geometry().area().divide(10e9).getInfo()
West_Darfur_Mha = West_Darfur_roi.geometry().area().divide(10e9).getInfo()
South_Darfur_Mha = South_Darfur_roi.geometry().area().divide(10e9).getInfo()

In [9]:
all_region_dfs = [
    df_2022_Central_Darfur, df_2023_Central_Darfur,
    df_2022_West_Darfur, df_2023_West_Darfur,
    df_2022_South_Darfur, df_2023_South_Darfur,
]
areas = [
    Central_Darfur_Mha, Central_Darfur_Mha, 
    West_Darfur_Mha, West_Darfur_Mha, 
    South_Darfur_Mha, South_Darfur_Mha
]
results = pd.DataFrame({
    "Darfur Region/Year": ["Central 2022", "Central 2023", "West 2022", "West 2023", "South 2022", "South 2023"],
    "Area (Mha)": areas,
    "Total Samples": [len(df) for df in all_region_dfs],
    "Crop Samples": [df["label"].value_counts()[1.0] for df in all_region_dfs],
    "Crop Proportion": [df["label"].value_counts(normalize=True)[1.0] for df in all_region_dfs]
})
results

Unnamed: 0,Darfur Region/Year,Area (Mha),Total Samples,Crop Samples,Crop Proportion
0,Central 2022,3.72578,138,18,0.130435
1,Central 2023,3.72578,157,10,0.063694
2,West 2022,2.275006,102,7,0.068627
3,West 2023,2.275006,102,7,0.068627
4,South 2022,7.844596,284,26,0.091549
5,South 2023,7.844596,337,20,0.059347


In [8]:
all_region_dfs = [
    df_2022_Central_Darfur, df_2023_Central_Darfur,
    df_2022_West_Darfur, df_2023_West_Darfur,
    df_2022_South_Darfur, df_2023_South_Darfur,
]
areas = [
    Central_Darfur_Mha, Central_Darfur_Mha, 
    West_Darfur_Mha, West_Darfur_Mha, 
    South_Darfur_Mha, South_Darfur_Mha
]
results = pd.DataFrame({
    "Darfur Region/Year": ["Central 2022", "Central 2023", "West 2022", "West 2023", "South 2022", "South 2023"],
    "Area (Mha)": areas,
    "Total Samples": [len(df) for df in all_region_dfs],
    "Crop Samples": [df["label"].value_counts()[1.0] for df in all_region_dfs],
    "Crop Proportion": [df["label"].value_counts(normalize=True)[1.0] for df in all_region_dfs]
})
results

Unnamed: 0,Darfur Region/Year,Area (Mha),Total Samples,Crop Samples,Crop Proportion
0,Central 2022,3.72578,166,25,0.150602
1,Central 2023,3.72578,166,19,0.114458
2,West 2022,2.275006,102,7,0.068627
3,West 2023,2.275006,102,7,0.068627
4,South 2022,7.844596,288,27,0.09375
5,South 2023,7.844596,349,27,0.077364


## 2. Verify West 2022 vs 2023 is not identical

In [10]:
# Verify West Darfur points are not identical 
df_2022_West_Darfur[df_2022_West_Darfur["label"] == 1]["geometry"].apply(str).sort_values()

85     POINT (22.58357621 13.6223303)
36     POINT (22.7199926 13.46485417)
14    POINT (22.74820422 13.14277069)
9     POINT (22.82382348 13.21075358)
18    POINT (22.82953307 13.27730204)
51     POINT (23.1920972 13.41481215)
82    POINT (23.26298529 13.68141724)
Name: geometry, dtype: object

In [11]:
df_2023_West_Darfur[df_2023_West_Darfur["label"] == 1]["geometry"].apply(str).sort_values()

101    POINT (22.66828223 14.12852563)
36      POINT (22.7199926 13.46485417)
14     POINT (22.74820422 13.14277069)
18     POINT (22.82953307 13.27730204)
71     POINT (23.08614043 13.56913349)
51      POINT (23.1920972 13.41481215)
82     POINT (23.26298529 13.68141724)
Name: geometry, dtype: object

In [12]:
# Samples are not identical but some are the same, proportion is the same though.

## 3. Might as well compute crop area estimate

Used area directly so estimate will be slightly different than using pixel total.

In [13]:
results["Crop Area Estimate (Mha)"] = results["Area (Mha)"] * results["Crop Proportion"] 
results["Crop Area Estimate (ha)"] = results["Area (Mha)"] * results["Crop Proportion"] * 10e6
results

Unnamed: 0,Darfur Region/Year,Area (Mha),Total Samples,Crop Samples,Crop Proportion,Crop Area Estimate (Mha),Crop Area Estimate (ha)
0,Central 2022,3.72578,138,18,0.130435,0.485971,4859713.0
1,Central 2023,3.72578,157,10,0.063694,0.237311,2373108.0
2,West 2022,2.275006,102,7,0.068627,0.156128,1561278.0
3,West 2023,2.275006,102,7,0.068627,0.156128,1561278.0
4,South 2022,7.844596,284,26,0.091549,0.718167,7181673.0
5,South 2023,7.844596,337,20,0.059347,0.465555,4655547.0


In [9]:
results["Crop Area Estimate (Mha)"] = results["Area (Mha)"] * results["Crop Proportion"] 
results["Crop Area Estimate (ha)"] = results["Area (Mha)"] * results["Crop Proportion"] * 10e6
results

Unnamed: 0,Darfur Region/Year,Area (Mha),Total Samples,Crop Samples,Crop Proportion,Crop Area Estimate (Mha),Crop Area Estimate (ha)
0,Central 2022,3.72578,166,25,0.150602,0.561111,5611114.0
1,Central 2023,3.72578,166,19,0.114458,0.426445,4264447.0
2,West 2022,2.275006,102,7,0.068627,0.156128,1561278.0
3,West 2023,2.275006,102,7,0.068627,0.156128,1561278.0
4,South 2022,7.844596,288,27,0.09375,0.735431,7354309.0
5,South 2023,7.844596,349,27,0.077364,0.606889,6068885.0
