## ANUCLIM 1982-2022

In [None]:
import xarray as xr
import rioxarray
from datetime import datetime
import pandas as pd
import numpy as np
import os
from odc.geo.xr import assign_crs

import sys
sys.path.append('/g/data/os22/chad_tmp/AusEFlux/src/')
from _collect_prediction_data import round_coords

In [None]:
from datacube.utils.dask import start_local_dask
client = start_local_dask(mem_safety_margin='2Gb')
client

In [None]:
gbox = xr.open_dataset('/g/data/os22/chad_tmp/climate-carbon-interactions/data/WCF_5km_monthly_2000_2022.nc').odc.geobox

In [None]:
base='/g/data/gh70/ANUClimate/v2-0/stable/month/'
var =  'tavg' #'rain' # 'vpd' 'srad'

years = [str(i) for i in range(1982,2023)]

i=0
pp = []
for y in years:
    print(" {:02}/{:02}\r".format(i + 1, len(years)), end="")
    ds = xr.open_mfdataset([base+var+'/'+y+'/'+i for i in os.listdir(base+var+'/'+y+'/')],
                              chunks=dict(lat=1000, lon=1000))
    
    ds = assign_crs(ds, crs='epsg:4283') #GDA94
    ds = ds.drop('crs')[var]
    ds.attrs['nodata'] = np.nan
    ds = ds.odc.reproject(gbox, resampling='average')
    ds = round_coords(ds)
    pp.append(ds)
    i+=1

ds = xr.concat(pp, dim='time').sortby('time')
ds = ds.compute()
ds = ds.rename(var)
ds.to_netcdf('/g/data/os22/chad_tmp/climate-carbon-interactions/data/'+var+'_1km_monthly_1982_2022.nc')

In [None]:
import sys
import os
import numpy as np
import warnings
import xarray as xr
import odc.geo.xr
import pandas as pd
from odc.geo.xr import assign_crs
import requests
from bs4 import BeautifulSoup

In [None]:
sys.path.append('/g/data/os22/chad_tmp/AusEFlux/src/')
from dask_utils import start_local_dask

client = start_local_dask(mem_safety_margin='2Gb')
client

In [None]:
base='/g/data/os22/chad_tmp/climate-carbon-interactions/data/AVHRR/'

In [None]:
files = [base+i for i in os.listdir(base) if i.endswith('.nc')]
files.sort()

In [None]:
ds = xr.open_mfdataset(files) #chunks=dict(latitude=1000, longitude=1000)
ds = assign_crs(ds, crs='epsg:3577')

In [None]:
ds = ds.compute()

In [None]:
# ds.TIMEOFDAY_mean.mean(['x', 'y']).plot(figsize=(11,5))

In [None]:
ds1 = xr.open_dataset('/g/data/os22/chad_tmp/climate-carbon-interactions/data/AVHRR_NDVI_1982_2013.nc')
ds1 = assign_crs(ds1, crs='epsg:3577')

In [None]:
ds1

In [None]:
dss

In [None]:
dss = xr.merge([ds,ds1])

In [None]:
del dss.TIMEOFDAY_mean.attrs['grid_mapping']
del dss.SZEN_mean.attrs['grid_mapping']

In [None]:
import xarray as xr
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sb
from scipy import stats
from scipy.stats import gaussian_kde
from sklearn.metrics import mean_absolute_error, r2_score

In [None]:
merge = xr.open_dataset('//g/data/os22/chad_tmp/climate-carbon-interactions/data/d_export.nc')

In [None]:
# fig,ax = plt.subplots(1,1, figsize=(13,6))
# merge.ndvi_cdr.mean(['x','y']).plot(ax=ax, label='AVHRR Original')
# merge.ndvi_mcd.mean(['x','y']).plot(ax=ax, label='MODIS Original')
# merge.ndvi_mcd_pred.mean(['x','y']).plot(ax=ax, label='AVRR_Adjust')
# ax.legend()

In [None]:
# plt.scatter(merge.ndvi_mcd.values, merge.ndvi_cdr.values)
# merge.ndvi_cdr.mean(['x','y']).plot.scatter(label='AVHRR Original')

In [None]:
df = merge.ndvi_mcd.mean(['x', 'y']).rename('MODIS').to_dataframe()

In [None]:
df['AVHRR-original'] = merge.ndvi_cdr.mean(['x', 'y']).rename('AVHRR-Original').to_dataframe()
df['AVHRR-adjusted'] = merge.ndvi_mcd_pred.mean(['x', 'y']).rename('AVHRR-Adjusted').to_dataframe()


In [None]:
df = df.dropna()

In [None]:
df.plot()

In [None]:
products=['AVHRR-original', 'AVHRR-adjusted']

In [None]:
fig,ax = plt.subplots(1,2, figsize=(10,5), sharey=True)
font=15

for prod, ax in zip(products, ax.ravel()):
    obs,pred = df['MODIS'].values, df[prod].values
    slope, intercept, r_value, p_value, std_err = stats.linregress(obs,pred)
    r2 = r_value**2
    ac = mean_absolute_error(obs, pred)
    
    xy = np.vstack([obs,pred])
    z = gaussian_kde(xy)(xy)
    
    sb.scatterplot(data=df, x='MODIS',y=prod, c=z, s=50, lw=1, alpha=0.5, ax=ax)
    sb.regplot(data=df, x='MODIS',y=prod, scatter=False, color='blue', ax=ax)
    sb.regplot(data=df, x='MODIS',y='MODIS', color='black', scatter=False, line_kws={'linestyle':'dashed'}, ax=ax)
    ax.set_title(prod, fontsize=font)
    ax.set_xlabel('MODIS NDVI', fontsize=font)
    ax.set_ylabel('')
    ax.text(.05, .90, 'r\N{SUPERSCRIPT TWO}={:.2f}'.format(np.mean(r2)),
            transform=ax.transAxes, fontsize=font)
    ax.text(.05, .825, 'MAE={:.2g}'.format(np.mean(ac)),
            transform=ax.transAxes, fontsize=font)
    ax.tick_params(axis='x', labelsize=font)
    ax.tick_params(axis='y', labelsize=font)

fig.supylabel('AVHRR NDVI', fontsize=font)
plt.tight_layout();

In [None]:
av = xr.open_dataset('/g/data/os22/chad_tmp/climate-carbon-interactions/data/AVHRR_5km_monthly_1982_2013.nc')
av.isel(x=slice(650,750), y=slice(500,600)).to_netcdf('/g/data/os22/chad_tmp/climate-carbon-interactions/data/AVHRR_5km_monthly_1982_2013_subset.nc')

In [None]:
xx = xr.open_dataset('/g/data/os22/chad_tmp/climate-carbon-interactions/data/MODIS_NDVI_median_5km_monthly_2001_2022.nc')


In [None]:
xx.isel(x=slice(650,750), y=slice(500,600)).to_netcdf('/g/data/os22/chad_tmp/climate-carbon-interactions/data/MODIS_NDVI_median_5km_monthly_2001_2022_subset.nc')

In [None]:
ds = ds.drop('NDVI_std')

In [None]:
ds = ds.NDVI.persist()

In [None]:
ds_mean = ds.resample(time='MS', loffset=pd.Timedelta(14, 'd')).mean().compute()
ds_std = ds.resample(time='MS', loffset=pd.Timedelta(14, 'd')).std().compute()
count = (~np.isnan(ds))
count =  count.resample(time='MS', loffset=pd.Timedelta(14, 'd')).sum().compute()

In [None]:
ds_mean = ds_mean.rename('NDVI_mean').to_dataset()
ds_mean.attrs['nodata'] = np.nan
ds_std = ds_std.rename('NDVI_std').to_dataset()
ds_std.attrs['nodata'] = np.nan
count = count.rename('n_obs').to_dataset()
count.attrs['nodata'] = 0

In [None]:
ds_out = xr.merge([ds_mean, ds_std, count])
ds_out.attrs['nodata'] = np.nan
ds_out = assign_crs(ds_out, crs='epsg:3577')

In [None]:
ds = assign_crs(ds, crs='epsg:3577')

In [None]:
ds = ds.compute()

In [None]:
# ds1.y.values == ds1.y.values

In [None]:
ds1 = xr.open_dataset('/g/data/os22/chad_tmp/climate-carbon-interactions/data/AVHRR_NDVI_1982_2002.nc')

In [None]:
ds1 = assign_crs(ds1, crs='epsg:3577')

In [None]:
dss = xr.concat([ds1, ds], dim='time')

In [None]:
dss['n_obs'] = dss['n_obs'].astype(np.int16)

In [None]:
dss

In [None]:
del dss.NDVI_mean.attrs['grid_mapping']
del dss.NDVI_std.attrs['grid_mapping']
del dss.n_obs.attrs['grid_mapping']

In [None]:
dss.to_netcdf('/g/data/os22/chad_tmp/climate-carbon-interactions/data/AVHRR_NDVI_1982_2013.nc')

In [None]:
dss.sel(time='2011').NDVI_mean.plot.imshow(col='time', col_wrap=4, vmin=0)

In [None]:
dss.sel(time='1988').NDVI_mean.mean(['x','y']).plot()

In [None]:
dss.NDVI_mean.mean(['x','y']).plot()

In [None]:
# years = [str(i) for i in range(1983,2015)]

In [None]:
# %%time
# for y in years:
#     print(y)
    
#     url = 'https://www.ncei.noaa.gov/thredds/catalog/cdr/ndvi/'+y+'/catalog.html'

#     #get all relevant links for that year
#     soup = BeautifulSoup(requests.get(url).content, "html.parser")
#     files_to_dl = []
#     for link in soup.select('a[href*=".nc"]'):
#         files_to_dl.append(link["href"])
    
#     #create url to open
#     files = ['https://www.ncei.noaa.gov/thredds/dodsC/'+i[21:] for i in files_to_dl if i.endswith('.nc')]
    
#     #open files for that year and tidy up
#     warnings.filterwarnings("ignore")
#     ds = xr.open_mfdataset(files) #chunks=dict(latitude=1000, longitude=1000)

#     ds = assign_crs(ds, ds.crs.attrs['epsg_code'])
#     ds = ds.drop(['crs', 'lat_bnds', 'lon_bnds'])
#     ds = ds.sel(longitude=slice(110,155), latitude=slice(-9,-45)) #aus extent

#     ds = ds.compute()
#     ds.to_netcdf('/g/data/os22/chad_tmp/climate-carbon-interactions/data/AVHRR/AVHRR_cdr_v5_NDVI_daily_'+y+'.nc')
#     break

In [None]:
ds = xr.open_dataarray('/g/data/os22/chad_tmp/NEE_modelling/data/1km/kNDVI_1km_monthly_2002_2021.nc',
                      chunks=dict(latitude=750, longitude=750, time=-1))

In [None]:
ds = ds.interpolate_na(method='linear', dim='time', limit=2).compute()#.isel(time=110).plot.imshow(size=10)

In [None]:
ds.to_netcdf('/g/data/os22/chad_tmp/NEE_modelling/data/1km/kNDVI_1km_monthly_2002_2021.nc')