# Comparison of Sentinel-1 RTC products from different software

In [None]:
import os
import h5py
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import requests
import boto3
import pandas as pd
import json
import rasterio
from rasterio.crs import CRS
from rasterio.enums import Resampling
import rioxarray
import asf_search as asf
from shapely.geometry import Polygon
from celluloid import Camera # getting the camera
from IPython.display import HTML

sns.set_theme()

%matplotlib inline

# Functions

In [None]:
def make_gif(imgs, vmin, vmax):
    fig, ax = plt.subplots() # make it bigger
    camera = Camera(fig)# the camera gets our figure
    for i,img in enumerate(imgs):
        im = ax.imshow(img,
                  vmin=vmin,
                  vmax=vmax) # plotting
        camera.snap()
    animation = camera.animate()
    return animation

# Settings

In [None]:
# general structure for scenes in s3
# s3_bucket/software/dem/scene/scene_files
s3_bucket = 'deant-data-public-dev'
s3_bucket_link = 'https://deant-data-public-dev.s3.ap-southeast-2.amazonaws.com/'
softwares = ['pyrosar','rtc-opera']
scenes = [
        'S1B_IW_SLC__1SSH_20190223T222639_20190223T222706_015079_01C2E9_1D63',
        'S1A_IW_SLC__1SSH_20190605T222724_20190605T222751_027550_031BE1_AD3A',
        'S1A_IW_SLC__1SSH_20190926T124734_20190926T124804_029192_0350B9_FA6B',
        'S1A_IW_SLC__1SSH_20230127T142750_20230127T142817_046970_05A22F_17F7',
        'S1B_IW_SLC__1SSH_20190315T195015_20190315T195045_015369_01CC73_DB8B',
        'S1B_IW_SLC__1SSH_20210223T233056_20210223T233124_025740_031194_E7BE',
        'S1B_IW_SLC__1SSH_20210228T035005_20210228T035033_025801_03138F_8CB2',
]
dem = 'glo_30'
proj = '3031'

# get crededentials for AWS
with open('aws_credentials.txt') as f:
    ACCESS_ID, ACCESS_KEY = f.readlines()
    ACCESS_ID = ACCESS_ID.strip()
    ACCESS_KEY = ACCESS_KEY.strip()

# setup s3
s3 = boto3.client('s3', 
                        region_name='ap-southeast-2',
                        aws_access_key_id=ACCESS_ID,
                        aws_secret_access_key= ACCESS_KEY)

# make data directory to store local files
os.makedirs('data', exist_ok=True)

## Show example scene files for software

In [None]:
file_list = []
for software in softwares:
    for i,scene in enumerate(scenes):
        params = {
            "Bucket": f'{s3_bucket}',
            "Prefix": f'{software}/{dem}/{proj}/{scene}'
        }
        objects = s3.list_objects_v2(**params)
        if 'Contents' in objects.keys():
            if i == 0:
                print(f'software : {software}')
                for x in objects['Contents']:
                    print(x)
            data = objects['Contents']
            file_list.extend([x for x in objects['Contents']])

# save all of the files in a dataframe for east of searching
df_s3 = pd.DataFrame.from_records(file_list)
df_s3[['software','dem','crs','scene','file']] = df_s3['Key'].str.split('/', n=4, expand=True)
#df_s3

# Compare total timing

In [None]:
timing_data = []
for software in softwares:
    for scene in scenes:
        timing_file = f'{software}/{dem}/{proj}/{scene}/{scene}_timing.json'
        try:
            s3.download_file(s3_bucket, timing_file, 'tmp.json')
            with open('tmp.json') as json_file:
                data = json.load(json_file)
                data['software'] = software
                data['scene'] = scene
            timing_data.append(data)
            print(f'downloaded: {timing_file}')
        except:
            print(f'no timing file: {timing_file}')

os.remove('tmp.json')
df_timing = pd.DataFrame.from_records(timing_data, index=['software','scene'])

# plot mean time by software
sw_count = df_timing.groupby('software').size()
ax = (df_timing.groupby('software').mean()
 .drop(columns=['Total'])
 .plot.bar(stacked=True))
ax.set_xlabel('Software')
ax.set_ylabel('Time (seconds)')
ax.set_title(f'Software Processing Times (DEM upsampling=2)')
print(sw_count)
df_timing

# Compare RTC Process Timing
- Investigate the logs

In [None]:
OPERA_RTC_times = {}
# read opera logs
opera_logs = df_s3[(df_s3['Key'].str.contains('logs')) & (df_s3['software']=='rtc-opera')]
logs = s3.get_object(Bucket=s3_bucket, Key=opera_logs['Key'].values[1])
logs_content = logs['Body'].read()
log_lines = logs_content.decode("utf-8").splitlines()
# find the lines with timing info
time_lines = [x for x in log_lines if (('time' in x) or ('timing' in x))]
GEO_AP = [x for x in time_lines if ('GEO-AP' in x)] # burst AP geometric correction 
RTC_AP = [x for x in time_lines if ('RTC-AP' in x)] # burst AP radiometric correction
CHILD = [x for x in time_lines if ('Child' in x)] # total time for geom/radio correction
# multi process is run, meaning we cannot use the sum for total processing time
# we therefor take the ratio of total geo/radio process and allocate time
GEO_AP_t = sum([float(x.split(': ')[-1]) for x in GEO_AP])
RTC_AP_t = sum([float(x.split(': ')[-1]) for x in RTC_AP])
RTC_CHILD_t = sum([float(x.split(': ')[-1].split(' ')[0]) for x in CHILD])
Total_t = float(time_lines[-1].split(': ')[-1])
# add times to doct
OPERA_RTC_times['Terration Correction (geometric)'] = (GEO_AP_t/(GEO_AP_t+RTC_AP_t))*RTC_CHILD_t
OPERA_RTC_times['Terrain Flattening (radiometric)'] = (RTC_AP_t/(GEO_AP_t+RTC_AP_t))*RTC_CHILD_t
OPERA_RTC_times['Mosaicing and formatting'] = Total_t - RTC_CHILD_t
OPERA_RTC_times['Total'] = Total_t
OPERA_RTC_times

In [None]:
pyrosar_RTC_times = {}
# read pyrosar logs
pyrosar_logs = df_s3[(df_s3['Key'].str.contains('logs')) & (df_s3['software']=='pyrosar')]
logs = s3.get_object(Bucket=s3_bucket, Key=pyrosar_logs['Key'].values[0])
logs_content = logs['Body'].read()
log_lines = logs_content.decode("utf-8").splitlines()
# # find the lines with timing info
RTC_start = log_lines.index([x for x in log_lines if 'PROCESS 2' in x][0])
RTC_END = log_lines.index([x for x in log_lines if 'RTC Backscatter successfully made' in x][0]) 
#log_lines[RTC_start:RTC_END]
# pyrosar_RTC_times['Terration Correction (geometric)'] = (GEO_AP_t/(GEO_AP_t+RTC_AP_t))*RTC_CHILD_t
# pyrosar_RTC_times['Terrain Flattening (radiometric)'] = (RTC_AP_t/(GEO_AP_t+RTC_AP_t))*RTC_CHILD_t
# pyrosar_RTC_times['Mosaicing and formatting'] = Total_t - RTC_CHILD_t
# pyrosar_RTC_times['Total'] = Total_t


# Compare Values
**Differences**
- Subtle differences may be caused by apply_bistatic_delay_correction and apply_static_tropospheric_delay_correction for OPERA products
- DEM oversampling (2 is default for pyrosar, I think 1 for opera)
- Treatment of burst overlaps:
    - By default OPERA will select the middle of the burst overlaps 
    - SNAP selectes one (perhaps the first?)

In [None]:
# set the scene
scene = scenes[0]
scene_tifs = df_s3[(df_s3['scene'] == scene) & 
      (df_s3.file.str.contains('RTC|rtc')) &
      (df_s3.file.str.contains('tif'))
      ]
scene_tifs

In [None]:
# download tifs and store locally
for i in range(0,len(scene_tifs)):
      key = scene_tifs.iloc[i].Key
      filename = scene_tifs.iloc[i].file
      if not os.path.exists(f'data/{filename}'):
            print(f'downloading {filename}')
            s3.download_file(s3_bucket, key, f'data/{filename}')

# place to store data
hist_data, sw, crss, meta = [],[], [], []
colors = ['red', 'blue']

# plot the tif
f, ax = plt.subplots(nrows=1, ncols=len(scene_tifs), figsize=(18,10))
for i in range(0,len(scene_tifs)):
      filename = scene_tifs.iloc[i].file
      software = scene_tifs.iloc[i].software
      key = scene_tifs.iloc[i].Key

      # assign crs to pyrosar
      if 'pyrosar' in key:
            print(f'Assigning EPSG:3031 to {filename}')
            with rasterio.open(f'data/{filename}', 'r+') as rds:
                  rds.crs = CRS.from_epsg(3031)

      with rasterio.open(f'data/{filename}') as src:
            data = src.read(1)
            # covert from linear to db
            data = 10*np.log10(data)
            if 'pyrosar' in key:
                  # covert no data from 0 to nan
                  data[data==0] = np.nan
            crss.append(src.meta['crs'])
            print(f'{software} - {data.shape}')
            #plt.figure(figsize = (10,6))
            im = ax[i].imshow(data, vmin=-40, vmax=10, cmap='binary_r')
            ax[i].set_title(f'{software}')
            hist_data.append(data[(np.isfinite(data))])
            sw.append(software)
            meta.append(src.meta.copy())

plt.suptitle(f'{scene}', y=0.9)
cbar_ax = f.add_axes([0.95, 0.15, 0.04, 0.7])
f.colorbar(im, cax=cbar_ax)
plt.show()

# plot the histogram 
for i in range(0,len(sw)):
      u, std = np.mean(hist_data[i]), np.std(hist_data[i])
      plt.hist(hist_data[i], 
               density=True,
               bins=60, 
               alpha=0.5, 
               label=f'{sw[i]}; u={u:.3f}, std={std:.3f}', 
               color=colors[i],
               histtype='step')

plt.title(f'{scene}')
plt.xlabel('Gamma0 RTC (dB)')
plt.ylabel('Frequency')
plt.legend(loc='best')
plt.grid(True)
plt.show()

In [None]:
print('Metdata and projections')
for i,s in enumerate(sw):
    print(s)
    print(meta[i])

# Raster Difference Maps
- Note differences may be due to geometric differences and not intensity
- Be sure to inspect the pixel shift below

In [None]:
# get the scene geom in 4326
asf_result = asf.granule_search([scene], asf.ASFSearchOptions(processingLevel='SLC'))[0]
points = (asf_result.__dict__['umm']['SpatialExtent']['HorizontalSpatialDomain']
                ['Geometry']['GPolygons'][0]['Boundary']['Points'])
points = [(p['Longitude'],p['Latitude']) for p in points]
scene_poly = Polygon(points)
str(scene_poly)

In [None]:
pyrosar_file = scene_tifs[scene_tifs['software']=='pyrosar']['file'].iloc[0]
opera_file = scene_tifs[scene_tifs['software']=='rtc-opera']['file'].iloc[0]
# local files
pyrosar_file = f'data/{pyrosar_file}'
opera_file = f'data/{opera_file}'
pyrosar = rioxarray.open_rasterio(pyrosar_file)
opera = rioxarray.open_rasterio(opera_file)
# clip by the scene geometry
pyrosar_clipped = pyrosar.rio.clip([scene_poly], CRS.from_epsg(4326))
opera_clipped = opera.rio.clip([scene_poly], CRS.from_epsg(4326))
print(pyrosar_clipped.shape, opera_clipped.shape)
# match the projection/transform/shape
pyrosar_matched = pyrosar_clipped.rio.reproject_match(opera_clipped)
print(pyrosar_matched.shape)
# convert to db
pyrosar_db = 10*np.log10(pyrosar_matched)
opera_db = 10*np.log10(opera_clipped)
# calculate the difference between the two images
diff = pyrosar_db - opera_db
# relative difference as a % of opera
rel_deff = 100*(diff/opera_clipped)
# save tifs
pyrosar_db.rio.to_raster(f'data/{scene}_pyrosar_clipped.tif')
opera_db.rio.to_raster(f'data/{scene}_opera_clipped.tif')
diff.rio.to_raster(f'data/{scene}_diff_clipped.tif')

In [None]:
# resample 
# upscale_factor = 0.1
upscale_factor = False
if upscale_factor:
    new_width = int(pyrosar_db.rio.width * upscale_factor)
    new_height = int(pyrosar_db.rio.height * upscale_factor)

    pyrosar_db = pyrosar_db.rio.reproject(
        pyrosar_db.rio.crs,
        shape=(new_height, new_width),
        resampling=Resampling.bilinear,
    )

    opera_db = opera_db.rio.reproject(
        opera_db.rio.crs,
        shape=(new_height, new_width),
        resampling=Resampling.bilinear,
    )

    diff = pyrosar_db - opera_db
    print(diff.shape)

In [None]:
stats_arr = np.array(diff)[np.array((np.isfinite(diff)))]
print('Difference Stats')
print(f'min: {stats_arr.min()}', 
      f'max: {stats_arr.max()}',
      f'5th percentile: {np.percentile(stats_arr, 5)}',
      f'90th percentile: {np.percentile(stats_arr, 95)}',
      )

arrs = [pyrosar_db, opera_db, diff]
cmaps = ['binary_r','binary_r','bwr']
scales = [[-40,10],[-40,10],[-1,1]]
titles = ['pyrosar',
          'opera-rtc',
          'abs difference (pyrosar - opera-rtc)']

f, ax = plt.subplots(nrows=4, ncols=1, figsize=(10,40))
for i,arr in enumerate(arrs):
      im = ax[i].imshow(arr[0], 
            vmin = scales[i][0], 
            vmax = scales[i][1],
            cmap = cmaps[i])
      ax[i].set_title(titles[i])
      f.colorbar(im, ax=ax[i], label='decibels (dB)')
      
# plot the histogram
colors = ['red','blue']
for i in [0,1]:
      # only get real values 
      hist_data = np.array(arrs[i])[
            (np.isfinite(np.array(arrs[i])))
            ]
      u, std = np.mean(hist_data), np.std(hist_data)
      ax[3].hist(hist_data, 
               density=True,
               bins=60, 
               alpha=0.5, 
               label=f'{titles[i]}; u={u:.3f}, std={std:.3f}', 
               color=colors[i],
               histtype='step')
      ax[3].set_title('Pixel distribution (dB)')

plt.legend(loc='best')

# See pixel shift in small area

In [None]:
x1,x2,y1,y2 = 8600,9000,6600,7000 # full res
#x1,x2,y1,y2 = 6500,6900,4600,5000 # full res
#x1,x2,y1,y2 = 2000,2500,8600,9100 # full res
if upscale_factor:
    x1,x2,y1,y2 = [int(n*upscale_factor) for n in [x1,x2,y1,y2]] # adjust for scaling
pyrosar_snip = pyrosar_db[0][y1:y2,x1:x2]
opera_snip = opera_db[0][y1:y2,x1:x2]
pyrosar_snip = pyrosar_db[0][y1:y2,x1:x2]
animation = make_gif([opera_snip, pyrosar_snip], vmin=-40, vmax=10)
HTML(animation.to_html5_video())

# Investigate Opera H5 Metadata

In [None]:
scene  = scenes[0]
h5_file_s3 = f'rtc-opera/{dem}/{scene}/OPERA_L2_RTC-{scene}.h5'
h5_file_local = f'data/OPERA_L2_RTC-{scene}.h5'
s3.download_file(s3_bucket, h5_file_s3, h5_file_local)
h5_data = h5py.File(h5_file_local,'r')

In [None]:
print(list(h5_data['identification']))
list(h5_data['data'].keys())

In [None]:
np.nanpercentile(h5_data['data']['numberOfLooks'][:],20)

In [None]:
f1['metadata']['processingInformation']['parameters'].keys()

In [None]:
for x in f1['data'].keys():
    print(x + ' : ', f1['data'][x])

In [None]:
np.nanmax(f1['data']['numberOfLooks'][:])