## Process Sami's harmonized NDVI

In [None]:
import xarray as xr
from odc.geo.xr import assign_crs
import pandas as pd

ds = pd.read_parquet('/g/data/os22/chad_tmp/climate-carbon-interactions/data/MCD43_AVHRR_NDVI_hybrid_2020-10-12.parquet')

ds['date'] = pd.to_datetime(ds['date'])
ds =  ds.rename({'date':'time'}, axis=1)
ds = ds.set_index(['x','y','time'])
ds = ds.drop(['month', 'year', 'season', 'sza', 'tod'], axis=1)

ds = ds.to_xarray()
ds = ds.transpose()
ds = assign_crs(ds, crs='epsg:4326')

ds.to_netcdf('/g/data/os22/chad_tmp/climate-carbon-interactions/data/MCD43_AVHRR_NDVI_hybrid_EasternOzWoody.nc')

## Gobal LST from AVHRR

data avilable here: http://glass.umd.edu/LST/v02/

paper here: https://essd.copernicus.org/articles/12/3247/2020/#section5


In [None]:
# ds.plot.imshow(col='time', col_wrap=4, robust=True)

## Soil Moisture (S-GRAFS)

In [None]:
import sys
import os
import numpy as np
import pandas as pd
import warnings
import odc.geo.xr
import xarray as xr
from odc.geo.xr import assign_crs
import pandas as pd
# import geopandas as gpd

In [None]:
base = '/g/data/fj4/SatelliteSoilMoistureProducts/S-GRAFS/SM_vol_1km/'

In [None]:
sys.path.append('/g/data/os22/chad_tmp/AusEFlux/src/')
from dask_utils import start_local_dask

client = start_local_dask(mem_safety_margin='2Gb')
client

In [None]:
files = [base+i for i in os.listdir(base) if i.endswith('.nc')]
files.sort()
print(len(files))

In [None]:
ds = xr.open_mfdataset(files) #chunks=dict(latitude=1000, longitude=1000)
ds = assign_crs(ds, crs='epsg:4326')
ds = ds[['soil_moisture']]
ds = ds.resample(time='MS', loffset=pd.Timedelta(14, 'd')).mean()
ds

In [None]:
%%time
ds = ds.compute()

In [None]:
ds.attrs['nodata'] = np.nan

In [None]:
gpp = assign_crs(xr.open_dataset('/g/data/os22/chad_tmp/NEE_modelling/results/predictions/AusEFlux_GPP_2003_2022_5km_quantiles_v1.1.nc'), crs='epsg:4326')

ds_5k = ds.odc.reproject(how=gpp.odc.geobox, resampling='average')

In [None]:
ds.to_netcdf('/g/data/os22/chad_tmp/climate-carbon-interactions/data/S-GRAFS_1km_monthly_2015_2022.nc')

In [None]:
ds_5k.to_netcdf('/g/data/os22/chad_tmp/climate-carbon-interactions/data/S-GRAFS_5km_monthly_2015_2022.nc')

## Woody Cover Fraction (~Trees)

For the years 2000-2022 this was already processed in the `AusEFlux` repo.

We'll need to mosaic the 25m datasets to get the 90's.  This requires a ton of memory, ~300 GiB

In [None]:
import os
import pandas as pd
import xarray as xr
import numpy as np
from odc.geo.xr import assign_crs
import warnings
import matplotlib.pyplot as plt

import sys
sys.path.append('/g/data/os22/chad_tmp/AusEFlux/src/')
from dask_utils import start_local_dask
from _collect_prediction_data import round_coords

In [None]:
client = start_local_dask(mem_safety_margin='2Gb')
client

In [None]:
base = '/g/data/os22/chad_tmp/AusEFlux/data/5km/WCF/'
files = [base+i for i in os.listdir(base) if i.endswith('.nc')]
files.sort()

In [None]:
wcf = xr.open_mfdataset(files)
wcf = assign_crs(wcf, crs='EPSG:4326')
wcf = wcf.compute()
del wcf.WCF.attrs['grid_mapping']
wcf.to_netcdf('/g/data/os22/chad_tmp/climate-carbon-interactions/data/WCF_5km_monthly_2000_2022.nc')

### Mosaicing the 25m tiles of WCF

In [None]:
base = '/g/data/ub8/au/LandCover/DEA_ALC/'
years = [str(i) for i in range(1995,2000)]
tiles = [i for i in os.listdir(base)]
tiles.sort()

In [None]:
gbox = xr.open_dataset('/g/data/os22/chad_tmp/climate-carbon-interactions/data/WCF_5km_monthly_2000_2022.nc').odc.geobox

In [None]:
for y in years:
    print(y)
    dss=[]
    i=0
    for t in tiles:
        print("  {:03}/{:03}\r".format(i + 1, len(range(0, len(tiles)))), end="")
        ds = xr.open_dataset('/g/data/ub8/au/LandCover/DEA_ALC/'+t+'/fc_metrics_'+t+'_'+y+'.nc', chunks=dict(x=4000, y=4000))
        ds = ds.transpose('y', 'x')
        ds = assign_crs(ds, crs='epsg:3577').drop('crs').to_array().squeeze().drop('variable').rename('WCF')
        dss.append(ds)
        i+=1

    warnings.filterwarnings("ignore")
    ds = xr.combine_by_coords(dss).compute()
    ds = ds.WCF
    ds.attrs['nodata'] = np.nan
    ds = ds.odc.reproject(gbox, resampling='average').compute()
    ds = round_coords(ds)
    time = pd.date_range(y+"-01", y+"-12", freq='MS') 
    time = [t+pd.Timedelta(14, 'd') for t in time]
    ds = ds.expand_dims(time=time)
    ds.to_netcdf('/g/data/os22/chad_tmp/climate-carbon-interactions/data/WCF/WCF_'+y+'.nc')
    

### Merge the files

In [None]:
_2000_2022 = assign_crs(xr.open_dataset('/g/data/os22/chad_tmp/climate-carbon-interactions/data/WCF_5km_monthly_2000_2022.nc'), crs='EPSG:4326')
mean = _2000_2022.WCF.sel(time=slice('2000', '2010')).mean('time')
mask = np.isnan(mean)
mask = ~mask

In [None]:
base = '/g/data/os22/chad_tmp/climate-carbon-interactions/data/WCF/'
files = [base+i for i in os.listdir(base) if i.endswith('.nc')]
files.sort()

In [None]:
wcf = xr.open_mfdataset(files)
wcf = wcf.rename({'__xarray_dataarray_variable__':'WCF'})
wcf = assign_crs(wcf, crs='EPSG:4326')
wcf = wcf.where(mask)
ds = xr.concat([wcf, _2000_2022], dim='time')
ds = ds.compute()

In [None]:
ds.to_netcdf('/g/data/os22/chad_tmp/climate-carbon-interactions/data/WCF_5km_monthly_1990_2022.nc')

In [None]:
ds.WCF.mean(['latitude', 'longitude']).plot(figsize=(13,4));
plt.title('Mean WCF 1992-2022 ');

(ds.WCF.sel(time=slice('2018','2022')).mean('time') - ds.WCF.sel(time=slice('1990','1994')).mean('time')).plot.imshow(size=7, robust=True, cmap='RdBu')
plt.title('Difference WCF: 2018-2022 average minus 1990-94 average');

## ANUCLIM 1982-2022

Including fractional anomalies

In [None]:
import xarray as xr
import rioxarray
from datetime import datetime
import pandas as pd
import numpy as np
import os
from odc.geo.xr import assign_crs

import sys
sys.path.append('/g/data/os22/chad_tmp/AusEFlux/src/')
from _collect_prediction_data import round_coords

In [None]:
from datacube.utils.dask import start_local_dask
client = start_local_dask(mem_safety_margin='2Gb')
client

In [None]:
gbox = xr.open_dataset('/g/data/os22/chad_tmp/climate-carbon-interactions/data/WCF_5km_monthly_2000_2022.nc').odc.geobox

In [None]:
base='/g/data/gh70/ANUClimate/v2-0/stable/month/'
var =  'tavg' #'rain' # 'vpd' 'srad'

years = [str(i) for i in range(1982,2023)]

i=0
pp = []
for y in years:
    print(" {:02}/{:02}\r".format(i + 1, len(years)), end="")
    ds = xr.open_mfdataset([base+var+'/'+y+'/'+i for i in os.listdir(base+var+'/'+y+'/')],
                              chunks=dict(lat=1000, lon=1000))
    
    ds = assign_crs(ds, crs='epsg:4283') #GDA94
    ds = ds.drop('crs')[var]
    ds.attrs['nodata'] = np.nan
    ds = ds.odc.reproject(gbox, resampling='average')
    ds = round_coords(ds)
    pp.append(ds)
    i+=1

ds = xr.concat(pp, dim='time').sortby('time')
ds = ds.compute()
ds = ds.rename(var)
ds.to_netcdf('/g/data/os22/chad_tmp/climate-carbon-interactions/data/'+var+'_5km_monthly_1982_2022.nc')

### Fractional anomalies

obs / climatology

kNDVI, LAI, rain_cml3, rain_cml6, rain_cml12

In [None]:
var = 'tavg'
ds = assign_crs(xr.open_dataset('/g/data/os22/chad_tmp/climate-carbon-interactions/data/5km/'+var+'_5km_monthly_1982_2022.nc'), crs='EPSG:4326')

In [None]:
mean = ds.groupby("time.month").mean("time")

In [None]:
frac = ds.groupby("time.month") / mean

In [None]:
frac[var].mean(['latitude', 'longitude']).plot(figsize=(13,4))

In [None]:
frac.drop('month').rename({var:var+'_anom'}).to_netcdf('/g/data/os22/chad_tmp/climate-carbon-interactions/data/5km/'+var+'_anom_5km_monthly_1982_2022.nc')

## AVHRR NDVI

Started doing this by downloading directly the global daily netcdfs from the links below, then processing them. But swithced to using GEE instead. 

from here: https://www.ncei.noaa.gov/data/avhrr-land-normalized-difference-vegetation-index/access/

description: https://climatedataguide.ucar.edu/climate-data/ndvi-normalized-difference-vegetation-index-noaa-avhrr

THREDDS: https://www.ncei.noaa.gov/thredds/catalog/cdr/ndvi/catalog.html

In [None]:
# import os
# import wget
# import requests
# from bs4 import BeautifulSoup

# base_url = 'https://www.ncei.noaa.gov/data/avhrr-land-normalized-difference-vegetation-index/access/'
# year = [str(y) for y in range(1982,2014)]
# out_loc = '/g/data/os22/chad_tmp/climate-carbon-interactions/data/AVHRR/'

# for y in year:
#     print(y)
#     #where the data is listed for a given year
#     url = "https://www.ncei.noaa.gov/data/avhrr-land-normalized-difference-vegetation-index/access/"+y
    
#     #get all relevant links for that year
#     soup = BeautifulSoup(requests.get(url).content, "html.parser")
#     files_to_dl = []
#     for link in soup.select('a[href*=".nc"]'):
#         files_to_dl.append(link["href"])
    
#     #create folder if necessary
#     if os.path.exists(out_loc+y):
#         pass
#     else:
#         os.mkdir(out_loc+y)
    
#     #download links
#     i=0
#     for filename in files_to_dl:
#         print("  {:03}/{:03}\r".format(i + 1, len(range(0, len(files_to_dl)))), end="")
        
#         if os.path.exists(out_loc+y+'/'+filename):
#             i+=1
#             continue
        
#         else:
#             wget.download(url=base_url+y+'/'+filename,
#                   out=out_loc+y+'/')
#             i+=1

#------------------------------------------------------------------------------
##Parallel version of the above...not faster due to band width limits on ARE

# import multiprocessing as mp
# from tqdm import tqdm

# def _parallel_dl(filename, y, base_url, out_loc):
#     wget.download(url=base_url+y+'/'+filename,
#                   out=out_loc+y+'/')

# #download each link
# def update(*a):
#     pbar.update()      

# for y in year:
#     print(y)
#     #where the data is listed for a given year
#     url = "https://www.ncei.noaa.gov/data/avhrr-land-normalized-difference-vegetation-index/access/"+y
    
#     #get all relevant links for that year
#     soup = BeautifulSoup(requests.get(url).content, "html.parser")
#     files_to_dl = []
#     for link in soup.select('a[href*=".nc"]'):
#         files_to_dl.append(link["href"])
    
#     if os.path.exists(out_loc+y):
#         pass
#     else:
#         os.mkdir(out_loc+y)
    
#     pbar = tqdm(total=len(files_to_dl))

#     with mp.Pool(ncpus) as pool:
#         for filename in files_to_dl:
#             pool.apply_async(
#                 _parallel_dl,
#                     [filename,
#                      y, 
#                      base_url,
#                      out_loc
#                     ],
#                 callback=update,
#                 )
#         pool.close()
#         pool.join()
#         pbar.close()

#---------------------------------------------------------------------------
## Process AVHRR into monthly mean NDVI

# base = '/g/data/os22/chad_tmp/climate-carbon-interactions/data/AVHRR/1982/'

# files = [base+i for i in os.listdir(base) if i.endswith('.nc')]
# files.sort()

# warnings.filterwarnings("ignore")
# ds = xr.open_mfdataset(files) #chunks=dict(latitude=1000, longitude=1000)

# ds = assign_crs(ds, ds.crs.attrs['epsg_code'])
# ds = ds.drop(['crs', 'lat_bnds', 'lon_bnds'])
# ds = ds.sel(longitude=slice(110,155), latitude=slice(-9,-45)) #aus extent

# ds = ds.compute()

# #QA masking
# flag_keys = ds.QA.flag_meanings.split()
# qa_dict = dict(zip(flag_keys, ds.QA.flag_masks))

# clouds = np.bitwise_and(ds['QA'], qa_dict['cloudy'])
# clouds = xr.where(clouds>0, 1, 0).astype(bool) #equals 1 where cloudy
# cloud_shadows = np.bitwise_and(ds['QA'], qa_dict['cloud_shadow'])
# cloud_shadows = xr.where(cloud_shadows>0, 1, 0).astype(bool)  #equals 1 where cloud_shadow

# mask = (clouds | cloud_shadows) #combine

# ds = ds.where(~mask)

# ds = ds.NDVI.resample(time='MS').mean()

# ds = assign_crs(ds, crs='epsg:4326')
# del ds.attrs['grid_mapping']

# ds.to_netcdf('/g/data/os22/chad_tmp/climate-carbon-interactions/data/AVHRR/AVHRR_cdr_NDVI_monthly_1982.nc')