In [14]:
%load_ext autoreload
%autoreload 2
import os
os.chdir("/scratch/ewalt/pdm/rs-uncertainty")
import sys
sys.path.insert(0, "gee")
from utils import getGDalRioStatDataFrame, countNoData, getDataPositions
import rasterio
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import gdal
import subprocess, shlex
from pathlib import Path
import tempfile
sns.set()
sns.set_style("whitegrid")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [16]:
def loadRasters(*paths):
    res = []
    for path in paths:
        with rasterio.open(path) as f: 
            res.append(f.read(f.indexes))
    return res

# TL;DR

`rio warp` is consistently better than our `gdal` implementation. It generates less nodata values and yields smaller errors.

# 20180705T105029
round: `python gee/ignore.py 20180705T105029 round`\
int: `python gee/ignore.py 20180705T105029 int`

In [9]:
ref_path = "/scratch/ewalt/pdm/rs-uncertainty/assets/data/sentinel_data/s2_reprojected/1023/1023_S2B_MSIL2A_20180705T105029_N0208_R051_T32VNM_20180705T144546.tif"
rio_path = "/scratch/ewalt/pdm/rs-uncertainty/gee_data/rio_warp_tests/1023_GEE_COPERNICUS-S2-SR-HARMONIZED_20180705T105029_20230523T194020.tif"
gdal_path_round = "/scratch/ewalt/pdm/rs-uncertainty/gee_data/gdal_warp_tests/round1023_GEE_COPERNICUS-S2-SR-HARMONIZED_20180705T105029_20230523T194020.tif"
gdal_path_int = "/scratch/ewalt/pdm/rs-uncertainty/gee_data/gdal_warp_tests/int1023_GEE_COPERNICUS-S2-SR-HARMONIZED_20180705T105029_20230523T194020.tif"

In [21]:
refData, rioData, gdalDataRound, gdalDataInt = loadRasters(ref_path, rio_path, gdal_path_round, gdal_path_int)
mses = {}
for source, data in zip(["rio","gdal_round","gdal_int"],[rioData, gdalDataRound, gdalDataInt]):
    pos = list(zip(*getDataPositions(data, nodata=0.)))
    mses[source] = np.mean((refData[:,pos[0],pos[0]]-data[:,pos[0],pos[0]])**2, axis=(1,2))
pd.DataFrame(mses)

Unnamed: 0,rio,gdal_round,gdal_int
0,5100.5,7550.5,7550.5
1,9248.0,25297.0,16744.5
2,11336.5,40862.5,29592.5
3,8192.0,22114.0,42038.5
4,19794.0,31749.0,8108.5
5,30608.5,35012.5,17592.5
6,9778.0,5109.0,33376.5
7,11996.5,50128.0,25728.0
8,7684.5,34110.5,23484.5
9,23408.5,47461.0,47461.0


In [10]:
# stats summary
for rfunc in [round, int]:
    print(rfunc.__name__)
    if rfunc==round:
        gdal_p = gdal_path_round
    else:
        gdal_p = gdal_path_int
    refData, rioData, gdalData = loadRasters(ref_path, rio_path, gdal_p)
    print(', '.join([str(k)+': '+str(v) for k,v in countNoData(refData, rioData, gdalData)]))
    for sfunc in ["min", "max", "mean", "std"]:
        print(sfunc)
        df = getGDalRioStatDataFrame(refData, rioData, gdalData, sfunc, rfunc, nodata=None)
        gdal_mse = np.mean((df[f"gdal_{rfunc.__name__}_delta"].values)**2)
        rio_mse = np.mean((df["rio_delta"].values)**2)
        print(f"rio_mse: {rio_mse:.3f}, gdal_mse: {gdal_mse:.3f}")
        df = getGDalRioStatDataFrame(refData, rioData, gdalData, sfunc, rfunc, nodata=0.)
        gdal_mse = np.mean((df[f"gdal_{rfunc.__name__}_delta"].values)**2)
        rio_mse = np.mean((df["rio_delta"].values)**2)
        print(f"rio_mse_no_nodata: {rio_mse:.3f}, gdal_mse_no_nodata: {gdal_mse:.3f}")
    print()

round
ref: 0, rio: 57154, gdal: 156407
min
rio_mse: 324.769, gdal_mse: 324.769


  df[gdalDeltaKey].append(df["ref"][-1]-df[gdalKey][-1])


rio_mse_no_nodata: 0.000, gdal_mse_no_nodata: 17.000
max
rio_mse: 0.000, gdal_mse: 0.000
rio_mse_no_nodata: 0.000, gdal_mse_no_nodata: 0.000
mean
rio_mse: 472.560, gdal_mse: 3962.412
rio_mse_no_nodata: 71.868, gdal_mse_no_nodata: 419.665
std
rio_mse: 492.332, gdal_mse: 4005.732
rio_mse_no_nodata: 34.494, gdal_mse_no_nodata: 82.110

int
ref: 0, rio: 57154, gdal: 157717
min
rio_mse: 324.769, gdal_mse: 324.769
rio_mse_no_nodata: 0.000, gdal_mse_no_nodata: 17.000
max
rio_mse: 0.000, gdal_mse: 0.000
rio_mse_no_nodata: 0.000, gdal_mse_no_nodata: 0.000
mean
rio_mse: 472.560, gdal_mse: 4019.700
rio_mse_no_nodata: 71.868, gdal_mse_no_nodata: 430.739
std
rio_mse: 492.332, gdal_mse: 4054.124
rio_mse_no_nodata: 34.494, gdal_mse_no_nodata: 85.921



# 20180605T105029
round: `python gee/ignore.py 20180605T105029 round`\
int: `python gee/ignore.py 20180605T105029 int`

In [18]:
ref_path = "/scratch/ewalt/pdm/rs-uncertainty/assets/data/sentinel_data/s2_reprojected/1023/1023_S2B_MSIL2A_20180605T105029_N0208_R051_T32VNM_20180605T132338.tif"
rio_path = "/scratch/ewalt/pdm/rs-uncertainty/gee_data/rio_warp_tests/1023_GEE_COPERNICUS-S2-SR-HARMONIZED_20180605T105029_20230523T193947.tif"
gdal_path_round = "/scratch/ewalt/pdm/rs-uncertainty/gee_data/gdal_warp_tests/round1023_GEE_COPERNICUS-S2-SR-HARMONIZED_20180605T105029_20230523T193947.tif"
gdal_path_int = "/scratch/ewalt/pdm/rs-uncertainty/gee_data/gdal_warp_tests/int1023_GEE_COPERNICUS-S2-SR-HARMONIZED_20180605T105029_20230523T193947.tif"

In [20]:
refData, rioData, gdalDataRound, gdalDataInt = loadRasters(ref_path, rio_path, gdal_path_round, gdal_path_int)
mses = {}
for source, data in zip(["rio","gdal_round","gdal_int"],[rioData, gdalDataRound, gdalDataInt]):
    pos = list(zip(*getDataPositions(data, nodata=0.)))
    mses[source] = np.mean((refData[:,pos[0],pos[0]]-data[:,pos[0],pos[0]])**2, axis=(1,2))
pd.DataFrame(mses)

Unnamed: 0,rio,gdal_round,gdal_int
0,5100.5,7550.5,7550.5
1,9248.0,25297.0,16744.5
2,11336.5,40862.5,29592.5
3,8192.0,22114.0,42038.5
4,19794.0,31749.0,8108.5
5,30608.5,35012.5,17592.5
6,9778.0,5109.0,33376.5
7,11996.5,50128.0,25728.0
8,7684.5,34110.5,23484.5
9,23408.5,47461.0,47461.0


In [8]:
# stats summary
for rfunc in [round, int]:
    print(rfunc.__name__)
    if rfunc==round:
        gdal_p = gdal_path_round
    else:
        gdal_p = gdal_path_int
    refData, rioData, gdalData = loadRasters(ref_path, rio_path, gdal_p)
    print(', '.join([str(k)+': '+str(v) for k,v in countNoData(refData, rioData, gdalData)]))
    for sfunc in ["min", "max", "mean", "std"]:
        print(sfunc)
        df = getGDalRioStatDataFrame(refData, rioData, gdalData, sfunc, rfunc, nodata=None)
        gdal_mse = np.mean((df[f"gdal_{rfunc.__name__}_delta"].values)**2)
        rio_mse = np.mean((df["rio_delta"].values)**2)
        print(f"rio_mse: {rio_mse:.3f}, gdal_mse: {gdal_mse:.3f}")
        df = getGDalRioStatDataFrame(refData, rioData, gdalData, sfunc, rfunc, nodata=0.)
        gdal_mse = np.mean((df[f"gdal_{rfunc.__name__}_delta"].values)**2)
        rio_mse = np.mean((df["rio_delta"].values)**2)
        print(f"rio_mse_no_nodata: {rio_mse:.3f}, gdal_mse_no_nodata: {gdal_mse:.3f}")
    print()

round
ref: 0, rio: 57154, gdal: 156407
min
rio_mse: 0.231, gdal_mse: 0.231
rio_mse_no_nodata: 0.000, gdal_mse_no_nodata: 0.000
max
rio_mse: 807.385, gdal_mse: 807.385
rio_mse_no_nodata: 807.385, gdal_mse_no_nodata: 807.385
mean
rio_mse: 399.906, gdal_mse: 3658.005
rio_mse_no_nodata: 104.859, gdal_mse_no_nodata: 526.577
std
rio_mse: 360.944, gdal_mse: 3259.645
rio_mse_no_nodata: 44.920, gdal_mse_no_nodata: 94.914

int
ref: 0, rio: 57154, gdal: 157717
min
rio_mse: 0.231, gdal_mse: 0.231
rio_mse_no_nodata: 0.000, gdal_mse_no_nodata: 0.000
max
rio_mse: 807.385, gdal_mse: 807.385
rio_mse_no_nodata: 807.385, gdal_mse_no_nodata: 807.385
mean
rio_mse: 399.906, gdal_mse: 3711.690
rio_mse_no_nodata: 104.859, gdal_mse_no_nodata: 539.181
std
rio_mse: 360.944, gdal_mse: 3296.652
rio_mse_no_nodata: 44.920, gdal_mse_no_nodata: 99.445

