# Gapfilling Observations with Synthetic data

## Create no-gaps datasets

In [None]:
import folium
import numpy as np
import xarray as xr
import seaborn as sb
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.cm import ScalarMappable
import odc.geo.xr
from odc.geo.xr import assign_crs

In [None]:
model_var='NDVI'

In [None]:
yy = xr.open_dataset('/g/data/os22/chad_tmp/climate-carbon-interactions/results/ml_predictions/'+model_var+'_predicted_5km_monthly_1982_2022.nc')
obs = xr.open_dataset('/g/data/os22/chad_tmp/climate-carbon-interactions/data/'+model_var+'_harmonization/'+model_var+'_5km_monthly_1982_2022_wGaps.nc')

if model_var=='LST':
    yy = yy-273.15
    obs = obs-273.15

obs = obs.sel(time=yy.time)
yy = yy.rename({'y':'latitude', 'x':'longitude'})

In [None]:
#fill with synthetic data
syn_fill = obs.fillna(yy[model_var])

## Save to disk

In [None]:
syn_fill.to_netcdf('/g/data/os22/chad_tmp/climate-carbon-interactions/data/5km/'+model_var+'_5km_monthly_1982_2022.nc')

## Plots

In [None]:
data = [obs, yy[model_var], syn_fill]
products=['Observed', 'Synthetic', 'Gap-filled']

In [None]:
if model_var=='NDVI':
    cmap='gist_earth_r'
    vmin=0.1
    vmax=0.8

if model_var=='LST':
    cmap='inferno'
    vmin=5
    vmax=40

t = 210

fig,axes = plt.subplots(1,3, figsize=(18,6), sharey=True)
for ax, ds, n in zip(axes.ravel(), data, products):
    im = ds.isel(time=t).plot.imshow(ax=ax, robust=True, vmin=vmin, vmax=vmax, add_colorbar=False, cmap=cmap) #
    y = ds.isel(time=t).time.dt.year.values
    m = ds.isel(time=t).time.dt.month.values
    ax.set_title(n+' '+str(y)+'-'+str(m),  fontsize=15);
    ax.set_yticklabels([])
    ax.set_ylabel('')
    ax.set_xlabel('')
    ax.set_xticklabels([])
fig.subplots_adjust(wspace=0.05)
fig.colorbar(im, ax=axes.ravel().tolist(), pad=0.01, label=model_var);

## Animations

In [None]:
import sys
sys.path.append('/g/data/os22/chad_tmp/dea-notebooks/Tools/')
from dea_tools.plotting import xr_animation
from IPython.display import Image

In [None]:
# # Produce time series animation
# path = '/g/data/os22/chad_tmp/climate-carbon-interactions/results/gifs/'+model_var+'_observed.gif'
# path = '/g/data/os22/chad_tmp/climate-carbon-interactions/results/gifs/'+model_var+'_synthetic.gif'
path = '/g/data/os22/chad_tmp/climate-carbon-interactions/results/gifs/'+model_var+'_gapfilled.gif'

if model_var=='NDVI':
    cmap='gist_earth_r'
    vmin=0.1
    vmax=0.8

if model_var=='LST':
    cmap='inferno'
    vmin=5
    vmax=45

xr_animation(
             # ds=obs.to_dataset().sel(time=slice('1982','2000')).rolling(time=2, min_periods=1).mean(), 
             # ds=yy[model_var].to_dataset().sel(time=slice('1982','2000')).rolling(time=2, min_periods=1).mean(), 
             syn_fill.to_dataset().sel(time=slice('1982','2000')).rolling(time=2, min_periods=1).mean(), 
             output_path=path,
             bands=[model_var],  
             show_date='%B-%Y',
             show_text=model_var,
             interval=200,
             imshow_kwargs={'cmap': cmap, 'vmin': vmin, 'vmax': vmax},
             colorbar_kwargs={'colors': 'black'},
             # width_pixels=300,
             annotation_kwargs= {'color':'black', 'animated': True}
    ) 

# Plot animated gif
plt.close()
Image(filename=path)

## Create a Aus-wide timeseries plot

With rainfall anomalies

In [None]:
yy = xr.open_dataset('/g/data/os22/chad_tmp/climate-carbon-interactions/results/ml_predictions/'+model_var+'_predicted_5km_monthly_1982_2022.nc')
yy = yy[model_var]
obs = xr.open_dataset('/g/data/os22/chad_tmp/climate-carbon-interactions/data/'+model_var+'_harmonization/'+model_var+'_5km_monthly_1982_2022_wGaps.nc')
obs = obs[model_var].sel(time=slice('1982','2022')).rename({'latitude':'y', 'longitude':'x'})


In [None]:
rain = xr.open_dataset('/g/data/os22/chad_tmp/climate-carbon-interactions/data/5km/rain_5km_monthly_1981_2022.nc').rain
rain_clim_std = rain.sel(time=slice('1982', '2012')).groupby('time.month').std().compute()
rain_clim_mean = rain.sel(time=slice('1982', '2012')).groupby('time.month').mean().compute()

#standardized anom
def stand_anomalies(ds, clim_mean, clim_std):
    std_anom = xr.apply_ufunc(lambda x, m, s: (x - m) / s,
    ds.compute().groupby("time.month"),
    clim_mean, clim_std)
    return std_anom

rain_std_anom = stand_anomalies(rain, rain_clim_mean, rain_clim_std)

rain_df = rain_std_anom.rename('rain').sel(time=slice('1982', '2022')).rolling(time=3,
                min_periods=1).mean().mean(['latitude','longitude']).to_dataframe().drop(['spatial_ref', 'month'], axis=1)

In [None]:
#fill with synthetic data
syn_fill = obs.fillna(yy)

In [None]:
fig, ax = plt.subplots(1,1, figsize=(18,6))
ax2 = ax.twinx()

obs.rolling(time=3, min_periods=1).mean().mean(['x','y']).plot(ax=ax, label='observed (with gaps)', c='black')
yy.rolling(time=3, min_periods=1).mean().mean(['x','y']).plot(ax=ax, label='predictions', linestyle='dashed', c='red')
syn_fill.rolling(time=3, min_periods=1).mean().mean(['x','y']).plot(ax=ax, label='ML gap-filled', linestyle='dashed', c='forestgreen')

norm=plt.Normalize(-2,2)
cmap = mpl.colors.LinearSegmentedColormap.from_list("", ['saddlebrown','chocolate','white','darkturquoise','darkcyan'], N=256)

# Plot bars
bar = ax2.bar(rain_df.index, 1, color=cmap(norm(rain_df['rain'])), width=32)

sm = ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])
cbar = plt.colorbar(sm, shrink=0.8, pad=0.01)
cbar.set_label('Rainfall Anomaly',labelpad=.5)

ax2.set_zorder(ax.get_zorder()-1)
ax.set_frame_on(False)

# Reformat y-axis label and tick labels
ax.set_ylabel(model_var)
ax.set_xlabel('')
ax2.set_ylabel('')
ax2.set_yticks([])
ax2.set_ylim([0, 1]) 
ax.margins(x=0)

# Adjust the margins around the plot area
plt.subplots_adjust(left=0.1, right=None, top=None, bottom=0.2, wspace=None, hspace=None)

if model_var=='NDVI':
    ax.set_ylim(0.20, 0.40)

ax.legend()
ax.set_title('Australian three-month rolling mean '+model_var);
fig.savefig("/g/data/os22/chad_tmp/climate-carbon-interactions/results/figs/Australian_"+model_var+"_rollingmean.png",
            bbox_inches='tight', dpi=300)

## Test areas



In [None]:
test_area_1 = obs.isel(y=range(115,120), x=range(355,360)) #Northern Territory tropical savannah
test_area_2 = obs.isel(y=range(455,460), x=range(100,105)) # SW WA cropping
test_area_3 = obs.isel(y=range(545,550), x=range(745,750)) # SE Aus forest

test_area_1_yy = yy[model_var].isel(y=range(115,120), x=range(355,360))
test_area_2_yy = yy[model_var].isel(y=range(455,460), x=range(100,105)) 
test_area_3_yy = yy[model_var].isel(y=range(545,550), x=range(745,750)) 

test_area_1 = test_area_1.sel(time=slice('1982', '2000')).mean(['x', 'y'])
test_area_2 = test_area_2.sel(time=slice('1982', '2000')).mean(['x', 'y'])
test_area_3 = test_area_3.sel(time=slice('1982', '2000')).mean(['x', 'y'])

test_area_1_yy = test_area_1_yy.sel(time=slice('1982', '2000')).mean(['x', 'y'])
test_area_2_yy = test_area_2_yy.sel(time=slice('1982', '2000')).mean(['x', 'y'])
test_area_3_yy = test_area_3_yy.sel(time=slice('1982', '2000')).mean(['x', 'y'])

In [None]:
#fill with climatology
linear_area_1 = test_area_1.interpolate_na(method='linear', dim='time', limit=3)
test_area_1_clim = test_area_1.groupby("time.month").mean("time")
clim_area_1 = test_area_1.groupby("time.month").fillna(test_area_1_clim)

#combined fill
combine_area_1 = linear_area_1.groupby("time.month").fillna(clim_area_1).compute()

#fill with synthetic data
syn_fill_area_1 = test_area_1.fillna(test_area_1_yy)

#---------------------------------------------------------------------------------
#fill with climatology
linear_area_2 = test_area_2.interpolate_na(method='linear', dim='time', limit=3)
test_area_2_clim = test_area_2.groupby("time.month").mean("time")
clim_area_2 = test_area_2.groupby("time.month").fillna(test_area_2_clim)

#combined fill
combine_area_2 = linear_area_2.groupby("time.month").fillna(clim_area_2).compute()

#fill with synthetic data
syn_fill_area_2 = test_area_2.fillna(test_area_2_yy)

#---------------------------------------------------------------------------------
#fill with climatology
linear_area_3 = test_area_3.interpolate_na(method='linear', dim='time', limit=3)
test_area_3_clim = test_area_3.groupby("time.month").mean("time")
clim_area_3 = test_area_3.groupby("time.month").fillna(test_area_3_clim)

#combined fill
combine_area_3 = linear_area_3.groupby("time.month").fillna(clim_area_3).compute()

#fill with synthetic data
syn_fill_area_3 = test_area_3.fillna(test_area_3_yy)

In [None]:
fig,ax = plt.subplots(3,1, figsize=(13,12), sharex=True)

combine_area_1.plot(ax=ax[0], label='linear+climatology', linestyle='--', c='green')
syn_fill_area_1.plot(ax=ax[0], label='synthetic', linestyle='dashdot', c='red')
test_area_1.plot(ax=ax[0], label='original', c='black')
ax[0].set_xlabel('')
ax[0].set_title('Northern Territory test area, savannah')
ax[0].legend()

combine_area_2.plot(ax=ax[1], label='linear+climatology', linestyle='--', c='green')
syn_fill_area_2.plot(ax=ax[1], label='synthetic', linestyle='dashdot', c='red')
test_area_2.plot(ax=ax[1], label='original', c='black')
ax[1].set_xlabel('')
ax[1].set_title('SW WA test area, cropping')

combine_area_3.plot(ax=ax[2], label='linear+climatology', linestyle='--', c='green')
syn_fill_area_3.plot(ax=ax[2], label='synthetic', linestyle='dashdot', c='red')
test_area_3.plot(ax=ax[2], label='original', c='black')
ax[1].set_xlabel('')
ax[2].set_title('SE Aus test area, forest')
plt.tight_layout();