# Batch run phenology analysis

Qsub each tile

In [None]:
%matplotlib inline

import os
import sys
import warnings
import numpy as np
import xarray as xr
import pandas as pd

import matplotlib.pyplot as plt
from odc.geo.xr import assign_crs


## Open data

In [None]:
ds = xr.open_dataset('/g/data/os22/chad_tmp/Aus_phenology/data/NDVI/NDVI_smooth_AusENDVI-clim_MCD43A4.nc')['NDVI']
covariables =  xr.open_dataset('/g/data/os22/chad_tmp/Aus_phenology/data/covars.nc')
covariables = covariables.drop_vars('wcf')

#testing slices
# ds = ds.isel(latitude=slice(200,352), longitude=slice(50,302)) 
# covariables = covariables.isel(latitude=slice(200,352), longitude=slice(50,302))

# ds = ds.isel(latitude=slice(200,252), longitude=slice(50,202)) 
# covariables = covariables.isel(latitude=slice(200,252), longitude=slice(50,202))

## Split data into tiles

Running all of Aus just takes too long, >500,000 pixels * > 14,000 time steps - dask graph is huge

In [None]:
# Function to split into spatial tiles
def split_spatial_tiles(data_array, lat_dim='latitude', lon_dim='longitude', n_lat=2, n_lon=4):
    lat_size = data_array.sizes[lat_dim] // n_lat
    lon_size = data_array.sizes[lon_dim] // n_lon
    
    tiles = []
    for i in range(n_lat):
        for j in range(n_lon):
            tile = data_array.isel({
                lat_dim: slice(i * lat_size, (i + 1) * lat_size),
                lon_dim: slice(j * lon_size, (j + 1) * lon_size)
            })
            tiles.append(tile)
    
    return tiles

# Split data into spatial tiles (2 latitude x 4 longitude)
tiles = split_spatial_tiles(ds, n_lat=2, n_lon=4)
covars_tiles = split_spatial_tiles(covariables, n_lat=2, n_lon=4)

#verify no overlaps or missing pixels.
assert np.sum(xr.combine_by_coords(tiles).longitude == ds.longitude) == len(ds.longitude)
assert np.sum(xr.combine_by_coords(tiles).latitude == ds.latitude) == len(ds.latitude)

# create named dictonary
tile_names=['NW', 'NNW', 'NNE', 'NE',
            'SW', 'SSW', 'SSE', 'SE']
tiles_dict = dict(zip(tile_names, tiles))
covars_tiles_dict = dict(zip(tile_names, covars_tiles))

#create a plot to visualise tiles
fig,axes = plt.subplots(2, 4, figsize=(10,8))
for t,ax in zip(tiles, axes.ravel()):
    t.isel(time=range(0,20)).mean('time').plot(ax=ax, add_colorbar=False, add_labels=False)
    ax.set_title(None);

## Export

In [None]:
for k,v in tiles_dict.items():
    print(k)
    v.to_netcdf(f'/g/data/os22/chad_tmp/Aus_phenology/data/tiled_data/NDVI_{k}.nc')

In [None]:
for k,v in covars_tiles_dict.items():
    print(k)
    v.to_netcdf(f'/g/data/os22/chad_tmp/Aus_phenology/data/tiled_data/COVARS_{k}.nc')

## Submit tiles to PBS job queue

In [1]:
import os

In [2]:
tiles = ["'NW'", "'NNW'", "'NNE'", "'NE'", "'SW'", "'SSW'", "'SSE'", "'SE'"] #,
os.chdir('/g/data/os22/chad_tmp/Aus_phenology/')
for t in tiles:
    print(t)
    os.system("qsub -v TILENAME="+t+" src/run_single_tile.sh")

'NNW'
127541427.gadi-pbs
'NNE'
127541428.gadi-pbs
'NE'
127541429.gadi-pbs
'SW'
127541430.gadi-pbs
'SSW'
127541431.gadi-pbs
'SSE'
127541432.gadi-pbs
'SE'
127541433.gadi-pbs


In [9]:
!qstat
# !qstat -xf 127454605.gadi-pbs

Job id                 Name             User              Time Use S Queue
---------------------  ---------------- ----------------  -------- - -----
127540824.gadi-pbs     sys-dashboard-s* cb3058            00:00:48 R normalsr-exec   
127541427.gadi-pbs     run_single_tile* cb3058            47:28:22 R normalsr-exec   
127541428.gadi-pbs     run_single_tile* cb3058            49:10:28 R normalsr-exec   
127541433.gadi-pbs     run_single_tile* cb3058            45:54:20 R normalsr-exec   


## Run interactively instead

Good for testing etc.

In [None]:
import sys
sys.path.append('/g/data/os22/chad_tmp/AusEFlux/src/')
from _utils import start_local_dask

sys.path.append('/g/data/os22/chad_tmp/Aus_phenology/src')
from batch_run_phenology_analysis import phenometrics_etal

In [None]:
## varibles for script
n_workers=102
memory_limit='450GiB'

regress_var = 'vPOS'
modelling_vars=['co2', 'srad', 'rain', 'tavg', 'vpd']
results_path = '/g/data/os22/chad_tmp/Aus_phenology/results/combined_tiles/'
template_path='/g/data/os22/chad_tmp/Aus_phenology/data/templates/'

In [None]:
start_local_dask(
        n_workers=n_workers,
        threads_per_worker=1,
        memory_limit=memory_limit
                    )

In [None]:
tiles = ['NW', 'NNW', 'NNE', 'NE', 'SW', 'SSW', 'SSE', 'SE']

for t in tiles:
    print(t)
    phenometrics_etal(
        n=t,
        results_path=results_path,
        template_path=template_path,
        regress_var=regress_var,
        modelling_vars=modelling_vars,
    )