In [1]:
import re
from pathlib import Path

import numpy as np
import rasterio as rio
from psycopg import connect
from tqdm.notebook import tqdm

## Rounded Earthstat

In [2]:
# check if earthstat rasters are properly rounded
earthstat_base_path = Path("../../h3_data_importer/data/earthstat/")
with rio.open(earthstat_base_path / "earthstat2000_global_prod/earthstat2000_global_abaca_production.tif") as r:
    arr = r.read(1)

In [3]:
print(np.max(arr))
print(np.min(arr[arr > 0]))

80.5969
1e-04


In [4]:
with rio.open(earthstat_base_path / "earthstat2000_global_prod_unprocessed/abaca_Production.tif") as r:
    arr_no_round = r.read(1)

In [5]:
print(np.max(arr_no_round))
print(np.min(arr_no_round[arr_no_round > 0]))

80.596924
1.04898646e-07


In [6]:
np.testing.assert_almost_equal(np.round(arr_no_round, 4), arr)

### Production

In [7]:
earthstat_rounded_rasters = sorted(list((earthstat_base_path / "earthstat2000_global_prod").glob("*.tif")))
earthstat_unprocessed_base = earthstat_base_path / "earthstat2000_global_prod_unprocessed"
earthstat_unprocessed_rasters = sorted(list(earthstat_unprocessed_base.glob("*.tif")))

In [8]:
len(earthstat_unprocessed_rasters)

175

In [9]:
len(earthstat_rounded_rasters)

182

Lengths don't match because in `extract-earthstat-crop-production` we create new rasters result of some map algebras

In [10]:
# match the names of crop from the raw rasters
patt = r"(.*)\_Production"
unprocessed_names_crop = [re.match(patt, s.name).groups()[0] for s in earthstat_unprocessed_rasters]
unprocessed_names_crop[:3]

['abaca', 'agave', 'alfalfa']

In [11]:
# match the names of crop from the rounded rasters
patt = r"earthstat2000\_global\_(.*)\_production"
rounded_names = [re.match(patt, x.name).groups()[0] for x in earthstat_rounded_rasters]
rounded_names[:3]

['abaca', 'agave', 'alfalfa']

In [12]:
len(rounded_names) == len(earthstat_rounded_rasters)

True

In [13]:
earthstat_rounded_rasters_clean = [
    earthstat_rounded_rasters[i] for i, name in enumerate(rounded_names) if name in unprocessed_names_crop
]

In [14]:
pbar = tqdm(
    zip(sorted(earthstat_rounded_rasters_clean), sorted(earthstat_unprocessed_rasters)),
    total=len(earthstat_unprocessed_rasters),
)

for rounded, not_rounded in pbar:
    pbar.set_description(f"Comparing {rounded.name} and {not_rounded.name}")
    with rio.open(rounded) as r_rounded:
        arr_rounded = r_rounded.read()
        # convert nans to 0s
        arr_rounded = np.where(np.isnan(arr_rounded), 0, arr_rounded)
        with rio.open(not_rounded) as r_not_rounded:
            arr_not_rounded = r_not_rounded.read()
            # convert nans to 0s
            arr_not_rounded = np.where(np.isnan(arr_not_rounded), 0, arr_not_rounded)
            np.testing.assert_almost_equal(np.round(arr_not_rounded, 4), arr_rounded, decimal=4)

  0%|          | 0/175 [00:00<?, ?it/s]

### Harvest

In [15]:
earthstat_ha_rounded_rasters = sorted(list((earthstat_base_path / "earthstat2000_global_ha").glob("*.tif")))
earthstat_unprocessed_base = earthstat_base_path / "earthstat2000_global_ha_unprocessed"
earthstat_unprocessed_ha_rasters = sorted(list(earthstat_unprocessed_base.glob("*.tif")))

print(len(earthstat_ha_rounded_rasters), len(earthstat_unprocessed_ha_rasters))

182 175


In [16]:
# reused the names from production
earthstat_rounded_ha_rasters_clean = [
    earthstat_ha_rounded_rasters[i] for i, name in enumerate(rounded_names) if name in unprocessed_names_crop
]

In [17]:
pbar = tqdm(
    zip(sorted(earthstat_rounded_ha_rasters_clean), sorted(earthstat_unprocessed_ha_rasters)),
    total=len(earthstat_unprocessed_ha_rasters),
)

for rounded, not_rounded in pbar:
    pbar.set_description(f"Comparing {rounded.name} and {not_rounded.name}")
    with rio.open(rounded) as r_rounded:
        arr_rounded = r_rounded.read()
        # convert nans to 0s
        arr_rounded = np.where(np.isnan(arr_rounded), 0, arr_rounded)
        with rio.open(not_rounded) as r_not_rounded:
            arr_not_rounded = r_not_rounded.read()
            # convert nans to 0s
            arr_not_rounded = np.where(np.isnan(arr_not_rounded), 0, arr_not_rounded)
            np.testing.assert_almost_equal(np.round(arr_not_rounded, 4), arr_rounded, decimal=4)

  0%|          | 0/175 [00:00<?, ?it/s]

## Rounded Mapspam

### Production

In [18]:
mapsmap_base_path = Path("../../h3_data_importer/data/mapspam/spam2010v2r0_global_prod/")
with rio.open(mapsmap_base_path / "spam2010V2r0_global_P_ACOF_A.tif") as r:
    arr_rounded = r.read(1)

In [19]:
unprocessed_mapspam_base_path = Path("../../h3_data_importer/data/mapspam/spam2010v2r0_global_prod_unprocessed/")
with rio.open(unprocessed_mapspam_base_path / "spam2010V2r0_global_P_ACOF_A.tif") as r:
    arr = r.read(1)

In [20]:
np.testing.assert_almost_equal(arr, arr_rounded)

In [21]:
rasters = mapsmap_base_path.glob("*.tif")
for raster in rasters:
    with rio.open(raster) as src_rounded:
        arr_rounded = src_rounded.read(1)
    with rio.open(unprocessed_mapspam_base_path / raster.name) as src:
        arr = src.read(1)
    np.testing.assert_almost_equal(np.round(arr, 4), arr_rounded)

### Harvest

In [22]:
mapsmap_base_path_ha = Path("../../h3_data_importer/data/mapspam/spam2010v2r0_global_ha/")
unprocessed_mapspam_base_path_ha = Path("../../h3_data_importer/data/mapspam/spam2010v2r0_global_ha_unprocessed/")

In [23]:
rasters = mapsmap_base_path_ha.glob("*.tif")
for raster in rasters:
    with rio.open(raster) as src_rounded:
        arr_rounded = src_rounded.read(1)
    with rio.open(unprocessed_mapspam_base_path_ha / raster.name) as src:
        arr = src.read(1)
    np.testing.assert_almost_equal(np.round(arr, 4), arr_rounded)