In [None]:
import os
import pickle

import datacube
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rioxarray
import xarray as xr
from datacube.utils import geometry
from deafrica_tools.spatial import xr_rasterize
from joblib import dump

In [None]:
admin_boundaries_file = (
    "data/admin-boundaries/GRID3_Zambia_Administrative_Boundaries_Districts_2020.shp"
)

In [None]:
admin_boundaries_gdf = gpd.read_file(admin_boundaries_file).to_crs("EPSG:6933")

In [None]:
province = "Central"
province_boundaries_gdf = admin_boundaries_gdf.loc[
    admin_boundaries_gdf["PROVINCE"] == province
]

In [None]:
# Crop mask query

dc = datacube.Datacube(app="crop_type_ml")

# Write a general query
time = "2019"
resolution = (-20, 20)
output_crs = "EPSG:6933"

query = {
    "time": time,
    "resolution": resolution,
    "output_crs": output_crs,
    "dask_chunks": {"time": 1, "x": 2000, "y": 2000},
}

In [None]:
# For each district, calculate the number of pixels at 20m resolution, and the proportional coverage of crop mask at 20m resolution
area_of_interest_gdf = province_boundaries_gdf.reset_index(drop=True)
district_column = "DISTRICT"

for index, district in area_of_interest_gdf.iterrows():

    # Set up geometry
    district_name = district[district_column]
    print(f"Processing {district_name}")

    # set up query based on polygon
    geom = geometry.Geometry(geom=district.geometry, crs=area_of_interest_gdf.crs)
    q = {"geopolygon": geom}

    # merge polygon query with user supplied query params
    query.update(q)

    # Load crop mask
    crop_mask = dc.load(product="crop_mask_southeast", **query).load()

    # Rasterize polygon based on crop mask
    district_mask = xr_rasterize(
        gdf=area_of_interest_gdf.iloc[[index]],
        da=crop_mask,
        transform=crop_mask.geobox.transform,
        crs="EPSG:6933",
    )

    # Work out number of pixels and area
    m2_per_km2 = 1000000

    cropping_pixels = crop_mask.filtered.where(district_mask==1).sum().item()
    cropping_area_m2 = cropping_pixels * abs(
        query["resolution"][0] * query["resolution"][1]
    )
    cropping_area_km2 = cropping_area_m2 / m2_per_km2

    district_pixels = district_mask.sum().item()
    district_area_m2 = district_pixels * abs(
        query["resolution"][0] * query["resolution"][1]
    )
    district_area_km2 = district_area_m2 / m2_per_km2

    crop_proportion = cropping_pixels / district_pixels
    area_of_interest_gdf.loc[index, "crop_area_km2"] = cropping_area_km2
    area_of_interest_gdf.loc[index, "district_area_km2"] = district_area_km2
    area_of_interest_gdf.loc[index, "crop_proportion"] = crop_proportion
    
    print(f"    Cropping proportion is {crop_proportion}")

In [None]:
cropping_by_district = (
    area_of_interest_gdf[
        [
            "FID",
            district_column,
            "crop_area_km2",
            "district_area_km2",
            "crop_proportion",
            "geometry",
        ]
    ]
    .sort_values("crop_proportion", ascending=False)
    .reset_index(drop=True)
)

In [None]:
cropping_by_district.to_file("data/cropping_propotion_by_district.geojson")

In [None]:
# convert to CSV by dropping geometry

# Convert to pandas and then pivot to get columns per draw
df = pd.DataFrame(cropping_by_district.drop(columns='geometry'))

df.to_csv("data/cropping_propotion_by_district.csv")