# Calculate crop statistics 

## Background

Crop type maps provide information on the distribution of different crops and can be used to generate crop area statistics, contributing to the understanding of agricultural production.

## Description

This notebook demonstrates how to calculate crop area statistics using the crop type maps and input administration boundary shapefile. Results will be inspected and saved into tables.

## Getting started
To run this analysis, run all the cells in the notebook, starting with the "Load packages" cell.

### Load packages

In [4]:
import os
import pickle
import json

import datacube
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rioxarray
import xarray as xr
from datacube.utils import geometry
from deafrica_tools.spatial import xr_rasterize



## Load admin boundaries and crop map

We will load the data using coordinate reference system `EPSG:6933` for area calculation. The reference system uses units in meter.

In [5]:
output_crs = "EPSG:6933"

In [6]:
admin_boundaries_file = (
    "Data/Mozambique_boundary.shp"
)
area_of_interest_gdf = gpd.read_file(admin_boundaries_file).to_crs(output_crs)

In [9]:
area_of_interest_gdf

Unnamed: 0,ADM0_CODE,ADM0_NAME,CONTINENT,ISO3,ISO2,UNI,UNDP,FAOSTAT,GAUL,RIC_ISO3,REC_ISO3,HIH,geometry
0,170,Mozambique,Africa,MOZ,MZ,508.0,MOZ,144.0,170.0,BDMS-MOZ,SADC-MOZ,0,"MULTIPOLYGON (((3183230.116 -3203550.711, 3183..."


In [8]:
crop_type_path='Results/Map/Test_areas_mosaic_croptype_merged_prediction.tif'
da_crop_type=rioxarray.open_rasterio(crop_type_path).squeeze()

RasterioIOError: Results/Map/Test_areas_mosaic_croptype_merged_prediction.tif: No such file or directory

In [None]:
# Dictionary with class labels from previous step
labels_path = "Results/class_labels.json"

# Read the class label dictionary
with open(labels_path, "r") as json_file:
    labels_dict = json.load(json_file)

In [None]:
area_of_interest_gdf.plot()

## Calculate areas per polygon

In [None]:
gdf_new=area_of_interest_gdf.copy()
for index, district in area_of_interest_gdf.iterrows():

    print(f"Processing polygon {index}")
    area_of_interest_gdf.loc[index,'ID']=index
    # Rasterize polygon
    district_mask = xr_rasterize(
        gdf=area_of_interest_gdf.iloc[[index]],
        da=da_crop_type,
        transform=da_crop_type.geobox.transform,
        crs=output_crs,
    )
    for class_name, class_value in labels_dict.items():
        crop_type_area=np.sum(district_mask==class_value)/10000.0
        attr_name=class_name+"_area_km2"
        gdf_new.loc[index, attr_name] = crop_type_area
        print('area in km2 for {}: {}'.format(class_name,crop_type_area))

## Plot areas histogram

In [None]:
gdf_new.plot(x='ID',y=[labels_dict.values()])

## Export to files

In [None]:
gdf_new.to_file("Results/crop_areas.geojson")

In [None]:
# convert to CSV by dropping geometry
df = pd.DataFrame(gdf_new.drop(columns='geometry'))

df.to_csv("Results/crop_areas.csv")