## Code to visualize ReaLSAT based maps and timeseries

In [1]:
import pandas as pd

In [2]:
import gdal

ModuleNotFoundError: No module named 'gdal'

In [3]:
import glob
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
import time
from IPython.display import clear_output
import os
os.environ['USE_PYGEOS'] = '0'
import geopandas
import matplotlib.gridspec as gridspec
from matplotlib.colors import LinearSegmentedColormap

colors = [(1, 1, 1), (0*1.0/255.0, 153*1.0/255.0, 76*1.0/255.0), (51*1.0/255.0, 51*1.0/255.0, 255*1.0/255.0)]
color_map = LinearSegmentedColormap.from_list('custom_colormap', colors, N=3)

In [4]:
# check if the average missing percentage before and after any given timestep is more than the threshold.
def prune_time_steps(fill_ts,window_size):
    T = fill_ts.shape[0]
    prn_ts = np.zeros((T,)).astype(bool)
    for j in range(1,T-1):
        cur_ts_before = fill_ts[max(0,j-window_size):j]
        cur_ts_after = fill_ts[j+1:min(j+window_size+1,T)]
        
        if np.mean(cur_ts_before)>90 or np.mean(cur_ts_after)>90:
            prn_ts[j] = 1#max([cur_sm,inp_ts[j]])
    if fill_ts[0]>90:
        prn_ts[0] = 1
    if fill_ts[T-1]>90:
        prn_ts[T-1] = 1
    return prn_ts

# create month and year information for monthly extents
year_arr = []
month_arr = []
for year in range(1984, 2021):
    for month in range(1,13):
        year_arr.append(year)
        month_arr.append(month)

year_arr = np.array(year_arr)[2:]# removing fist 2 dates because dataset starts from March, 1984
month_arr = np.array(month_arr)[2:]

In [5]:
#set a working directory and zenodo record id
working_dir = 'E:/casadje/jrcbox/datasets/lakes/ReaLSAT/' #'../'
zenodo_record_id = ''

# create working directory if absent
if os.path.isdir(working_dir) is False:
    os.mkdir(workding_dir)

# check for reference shapefile in the working directory
if os.path.isfile(f"{working_dir}ReaLSAT-2-0/ReaLSAT.shp") is False:
    
    print('downloading reference shapefile...')
    os.system(f"wget 'https://zenodo.org/record/7265435/files/ReaLSAT-2-0.zip?download=1' -O {working_dir}ReaLSAT-2-0.zip")
    
    print('unzipping reference shapefile...')
    os.system(f"cd {working_dir} && unzip ReaLSAT-2-0.zip && cd -")


print('reading reference shapefile...')
realsat = geopandas.read_file(f"{working_dir}ReaLSAT-2-0/ReaLSAT.shp")

# check of timeseries zip file in the working directory
if os.path.isfile(f"{working_dir}monthly_timeseries.zip") is False:
    print('downloading timeseries file...')
    os.system(f"wget 'https://zenodo.org/record/7265435/files/monthly_timeseries.zip?download=1' -O {working_dir}monthly_timeseries.zip")

    print('unzipping timeseries file...')
    os.system(f"cd {working_dir} && unzip monthly_timeseries.zip && cd -")


reading reference shapefile...


In [6]:
ID = 801275 # set waterbody ID

In [7]:
# read metadata 
# blon = realsat[realsat['ID'] == ID]['BLON'].values[0]
# blat = realsat[realsat['ID'] == ID]['BLAT'].values[0]
# pixel_resolution = realsat[realsat['ID'] == ID]['RES'].values[0]
blon, blat, pixel_resolution = realsat.loc[realsat['ID'] == ID, ['BLON', 'BLAT', 'RES']].values[0]
ID_str = str(ID).zfill(6)
box_id_str = str(int(blon)) + '_' + str(int(blat))

In [8]:
# check if timeseries zip file is already unzipped
csv_timeseries = f'{working_dir}monthly_timeseries/{box_id_str}/ID_{ID:06}.csv'
if os.path.isfile(csv_timeseries) is False:
    print('timeseries for this waterbody is not present in the working directory... unzipping...')
    command = f'unzip {working_dir}/monthly_timeseries/monthly_timeseries_{box_id_str}.zip -d {working_dir}'
    os.system(command)

# load timeseries array    
info_arr = np.loadtxt(csv_timeseries, delimiter=',').astype(float)
scaling = (pixel_resolution * 0.001)**2 # scaling for converting to sq. kms
realsat_timeseries = info_arr[2,:].copy()
gsw_timeseries = info_arr[3,:].copy()

# extracting timesteps where area values were potentially affected by large amount of missing data around them.
bad_ts = prune_time_steps(info_arr[0,:], 6)
realsat_timeseries[bad_ts] = np.nan
gsw_timeseries[bad_ts] = np.nan

In [None]:
# check if monthly shapes are already unzipped
tif_shapes = f'{working_dir}monthly_shapes/{box_id_str}/ID_{ID:06}.tif'
if os.path.isfile(tif_shapes) is False:
    print('realsat shapes for this waterbody is not present in the working directory... unzipping...')
    command = f'wget https://zenodo.org/record/{zenodo_record_id}/files/monthly_shapes_{box_id_str}.zip?download=1 -O {working_dir}monthly_shapes_{box_id_str}.zip'
    os.system(command)
    command = f'unzip {working_dir}monthly_shapes_{box_id_str}.zip -d {working_dir}'
    os.system(command)

maps_arr = gdal.Open(f'{working_dir}monthly_shapes/{box_id_str}/ID_{ID_str}.tif',0).ReadAsArray()
T = maps_arr.shape[0]

***

In [12]:
import xarray as xr

In [14]:
xr.riorasterio(f'{working_dir}monthly_shapes/{box_id_str}/ID_{ID_str}.tif')

  xr.open_rasterio(f'{working_dir}monthly_shapes/{box_id_str}/ID_{ID_str}.tif')


RasterioIOError: E:/casadje/jrcbox/datasets/lakes/ReaLSAT/monthly_shapes/0_40/ID_801275.tif: Cannot open TIFF file due to missing codec.

***

In [None]:
# iterate over months 
# change the start and end month as needed
for t in range(300, 301):
    f = plt.figure(figsize=(15, 4))
    gridspec.GridSpec(1,3)
    ax1 = plt.subplot2grid((1,3), (0,0), colspan=1, rowspan=1)
    ax2 = plt.subplot2grid((1,3), (0,1), colspan=3, rowspan=1)
    ax1.imshow(maps_arr[t],vmin=0,vmax=2,cmap=color_map)
    ax2.set_title('ReaLSAT Map')
    ax1.set_title(f'ID: {ID:06}')
    ax2.plot(realsat_timeseries * scaling, '.-b', label='ReaLSAT Area', markersize=10)
#         ax2.plot(gsw_timeseries*scaling,'.-r',label='ReaLSAT Area',markersize=10)
    ax2.plot([t, t], [np.nanmin(realsat_timeseries * scaling), np.nanmax(realsat_timeseries * scaling)], '--k')
    ax2.set_ylabel('Area (km²)')
    ax2.set_title('Year: {} Month: {}'.format(year_arr[t], month_arr[t]))
    ax2.set_xticks(np.arange(10, 442, 24))
    ax2.set_xticklabels(year_arr[np.arange(10, 442, 24)])
    plt.grid()
    plt.tight_layout()
    plt.show()
    time.sleep(0.5)
#     clear_output(wait=True)
