# 0. Preparation

## (Optoinal) mount google drive
- Make sure that available access is the user's own drive(no access across files in shared folder)

In [2]:
if 'google.colab' in str(get_ipython()):
    IN_COLLAB = True
else:
    IN_COLLAB = False

if IN_COLLAB:
    #TODO: CHANGE THIS BASED ON YOUR OWN LOCAL SETTINGS
    MY_HOME_ABS_PATH = "/content/drive/MyDrive/W210/co2-flux-hourly-gpp-modeling"
    from google.colab import drive
    drive.mount('/content/drive/')
else:
    MY_HOME_ABS_PATH = "/root/co2-flux-hourly-gpp-modeling/"

## Import libraries

In [3]:
import os
os.chdir(MY_HOME_ABS_PATH)
# !pip install -r ./code/requirements.txt

In [4]:
import os
import sys
import warnings
warnings.filterwarnings("ignore")
import copy
import json
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import pytorch_lightning as pl
import torch
import torch.nn as nn

from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import SMAPE, PoissonLoss, QuantileLoss
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters
from pytorch_forecasting import BaseModel, MAE
from pytorch_forecasting.metrics.point import RMSE
from pytorch_forecasting.data.encoders import NaNLabelEncoder

from sklearn.metrics import r2_score
from timeit import default_timer
from datetime import datetime
import gc
import pickle

# Load locale custome modules
os.chdir(MY_HOME_ABS_PATH)
if IN_COLLAB:
     sys.path.insert(0,os.path.abspath("./code/src/tools"))
else:
    sys.path.append('./.cred')
    sys.path.append('./code/src/tools')
    sys.path.append(os.path.abspath("./code/src/tools"))

from CloudIO.AzStorageClient import AzStorageClient
from data_pipeline_lib import *
from model_pipeline_lib import *

pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', lambda x: '%.5f' % x)
pl.seed_everything(42)

2023-03-22 03:02:16.313102: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-22 03:02:16.476710: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-03-22 03:02:16.476743: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-03-22 03:02:17.356112: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directo

42

# Load data from Azure blob

In [5]:
root_dir =  MY_HOME_ABS_PATH
tmp_dir =  root_dir + os.sep + '.tmp'
model_dir = root_dir + os.sep + 'data' + os.sep + 'models'
cred_dir = root_dir + os.sep + '.cred'
az_cred_file = cred_dir + os.sep + 'azblobcred.json'

container = "all-sites-data"
blob_name = "full_2010_2015_v_mvp_raw.parquet"
local_file = tmp_dir + os.sep + blob_name

In [6]:
# Download full data
root_dir  = MY_HOME_ABS_PATH
tmp_dir   = root_dir + os.sep + '.tmp'
model_dir = root_dir + os.sep + 'data' + os.sep + 'models'

container = "all-sites-data"
blob_name = "full_2010_2015_v_mvp_raw.parquet"
local_file = tmp_dir + os.sep + blob_name

data_df = get_raw_datasets(container, blob_name)

Data size: (4862712, 51)
Data Columns: Index(['GPP_NT_VUT_REF', 'site_id', 'timestep_idx_local',
       'timestep_idx_global', 'datetime', 'date', 'year', 'month', 'day',
       'hour', 'TA_ERA', 'SW_IN_ERA', 'LW_IN_ERA', 'VPD_ERA', 'P_ERA',
       'PA_ERA', 'EVI', 'NDVI', 'NIRv', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6',
       'b7', 'IGBP', 'lat', 'long', 'koppen_sub', 'koppen_main', 'c3c4',
       'c4_percent', 'BESS-PAR', 'BESS-PARdiff', 'BESS-RSDN', 'CSIF-SIFdaily',
       'PET', 'Ts', 'ESACCI-sm', 'MODIS_LC', 'NDWI', 'Percent_Snow', 'Fpar',
       'Lai', 'LST_Day', 'LST_Night', 'MODIS_IGBP', 'MODIS_PFT',
       'gap_flag_hour', 'gap_flag_month'],
      dtype='object')
NA count: 0


# Eval Model

## Default Evaluation

In [7]:
exp_model_dir = "/root/co2-flux-hourly-gpp-modeling/data/models/tft_model_1yrtrain_tuning_230318_1906" # TODO: Replace to your model dir
best_model_path = "/root/co2-flux-hourly-gpp-modeling/data/models/tft_model_1yrtrain_tuning_230318_1906/trial_0/epoch=12.ckpt"
best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path)
print(f"Quantiles: {best_tft.loss.quantiles}") # [0.02, 0.1, 0.25, 0.5, 0.75, 0.9, 0.98]

# Experiment constants
VAL_INDEX = 3
TEST_INDEX = 4
SUBSET_LEN = 24*365 # 1 year
ENCODER_LEN =  24*7
print(f"training timestemp length= {SUBSET_LEN}")

# Setup dataset
train_df, val_df, test_df = get_splited_datasets(data_df, VAL_INDEX, TEST_INDEX)
train_df, val_df, test_df = subset_data(train_df, val_df, test_df, SUBSET_LEN)
(training, validation, testing) = setup_tsdataset(train_df, val_df, test_df, ENCODER_LEN)

# Get small test data
subtest_df = test_df.loc[test_df['timestep_idx_local'] < ENCODER_LEN+5, ].copy()
_, _, subtesting = setup_tsdataset_mvp_mistake(train_df, val_df, subtest_df, ENCODER_LEN)
print(f"Subset num subtest timesteps: {len(subtest_df)}")

# create dataloaders for model
# ref: https://pytorch-lightning.readthedocs.io/en/stable/guides/speed.html#dataloaders
batch_size = 128  # set this between 32 to 128
cpu_count = os.cpu_count()

Quantiles: [0.02, 0.1, 0.25, 0.5, 0.75, 0.9, 0.98]
training timestemp length= 8760
Subest length: 8760 timesteps for each sites
Subset num train timesteps: 683280
Subset num val timesteps: 227760
Subset num test timesteps: 219000
Subset num subtest timesteps: 4325


In [8]:
# Eval on Validation set
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=cpu_count)

# Print Model Eval on Validation Set
start = default_timer()
print(f"Start eval on validation.")
val_actuals = torch.cat([y[0] for x, y in iter(val_dataloader)])
val_q_predictions = best_tft.predict(val_dataloader, mode="quantiles", show_progress_bar=True, return_x=False)
eval_time = default_timer() - start
print(f"Val eval time: {eval_time}")

# save eval results - also we can resume eval at a later point in time
with open(exp_model_dir + os.sep + "val_actuals.pkl", "wb") as fout:
    pickle.dump(val_actuals, fout)

with open(exp_model_dir + os.sep + "val_q_predictions.pkl", "wb") as fout:
    pickle.dump(val_q_predictions, fout)

Start eval on validation.


Predict: 100%|██████████| 1746/1746 [1:17:35<00:00,  2.67s/ batches]


Val eval time: 4719.448837316999


In [10]:
# load eval results - also we can resume eval at a later point in time
with open(exp_model_dir + os.sep + "val_actuals.pkl", "rb") as fin:
    val_acutals = pickle.load(fin)

with open(exp_model_dir + os.sep + "val_q_predictions.pkl", "rb") as fin:
    val_q_predictions = pickle.load(fin)

# Eval on Validation set
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=cpu_count)

val_predictions = val_q_predictions[:, :, 3] # p50 == estimation
val_p90 = val_q_predictions[:, :,5] # p90 == estimation
del val_q_predictions

# Unmasked evaltaion
eval_metric = get_eval_metrics(val_acutals, val_predictions, None, val_p90)
print(f"Unmasked Val eval evaluation: {eval_metric}")

# Masked evlationa
start = default_timer()
mask = torch.logical_not(torch.cat([x['decoder_cat'][:, :, -1].reshape(-1) for x, y in iter(val_dataloader)]))
masked_eval_metric = get_eval_metrics(val_acutals, val_predictions, mask, val_p90)
eval_time = default_timer() - start
print(f"Masked Val eval time: {eval_time}")
print(f"Masked Val eval evaluation: {masked_eval_metric}")

Unmasked Val eval evaluation: {'rmse': 2.364879846572876, 'mae': 1.1571272611618042, 'nse': 0.8897618055343628, 'r2': 0.8897618239747154, 'p50_loss': 0.2770015001296997, 'p90_loss': 0.14377298951148987}
Masked Val eval time: 63.85783086800075
Masked Val eval evaluation: {'rmse': 2.4099676609039307, 'mae': 1.1951128244400024, 'nse': 0.8896824717521667, 'r2': 0.8896824572402541, 'p50_loss': 0.2841099202632904, 'p90_loss': 0.14686322212219238}


In [41]:
test_df.head()

Unnamed: 0,GPP_NT_VUT_REF,site_id,timestep_idx_local,timestep_idx_global,datetime,date,year,month,day,hour,TA_ERA,SW_IN_ERA,LW_IN_ERA,VPD_ERA,P_ERA,PA_ERA,EVI,NDVI,NIRv,b1,b2,b3,b4,b5,b6,b7,IGBP,lat,long,koppen_sub,koppen_main,c3c4,c4_percent,BESS-PAR,BESS-PARdiff,BESS-RSDN,CSIF-SIFdaily,PET,Ts,ESACCI-sm,MODIS_LC,NDWI,Percent_Snow,Fpar,Lai,LST_Day,LST_Night,MODIS_IGBP,MODIS_PFT,gap_flag_hour,gap_flag_month
375216,0.15215,AU-Stp,0,350640,2010-01-01 00:00:00,2010-01-01,2010,1,1,0,26.741,0.0,424.841,7.978,0.0,97.839,0.12933,0.18494,0.05326,0.1981,0.288,0.0985,0.1557,0.3802,0.33684,0.2942,GRA,-17.1507,133.3502,6,2,C4,98.51,133.0,59.0,275.0,0.11301,-0.00981,303.28998,0.24542,10,-0.11392,0.0,0.32,0.7,317.36,296.04,GRA,GRA,0.0,0.0
375217,0.03356,AU-Stp,1,350641,2010-01-01 01:00:00,2010-01-01,2010,1,1,1,26.091,0.0,419.347,6.341,0.0,97.817,0.12933,0.18494,0.05326,0.1981,0.288,0.0985,0.1557,0.3802,0.35038,0.2942,GRA,-17.1507,133.3502,6,2,C4,98.51,133.0,59.0,275.0,0.11301,-0.00981,303.28998,0.24542,10,-0.11392,0.0,0.32,0.7,317.36,296.04,GRA,GRA,0.0,0.0
375218,0.01452,AU-Stp,2,350642,2010-01-01 02:00:00,2010-01-01,2010,1,1,2,25.701,0.0,419.347,5.679,0.0,97.779,0.12933,0.18494,0.05326,0.1981,0.288,0.0985,0.1557,0.3802,0.3805,0.2942,GRA,-17.1507,133.3502,6,2,C4,98.51,133.0,59.0,275.0,0.11301,-0.00981,303.28998,0.24542,10,-0.11392,0.0,0.32,0.7,317.36,296.04,GRA,GRA,0.0,0.0
375219,-0.02441,AU-Stp,3,350643,2010-01-01 03:00:00,2010-01-01,2010,1,1,3,25.31,0.0,419.347,5.017,0.0,97.741,0.12933,0.18494,0.05326,0.1981,0.288,0.0985,0.1557,0.3802,0.36598,0.2942,GRA,-17.1507,133.3502,6,2,C4,98.51,133.0,59.0,275.0,0.11301,-0.00981,303.28998,0.24542,10,-0.11392,0.0,0.32,0.7,317.36,296.04,GRA,GRA,0.0,0.0
375220,-0.01513,AU-Stp,4,350644,2010-01-01 04:00:00,2010-01-01,2010,1,1,4,25.05,0.0,427.48,4.592,0.0,97.736,0.12933,0.18494,0.05326,0.1981,0.288,0.0985,0.1557,0.3802,0.23086,0.2942,GRA,-17.1507,133.3502,6,2,C4,98.51,133.0,59.0,275.0,0.11301,-0.00981,303.28998,0.24542,10,-0.11392,0.0,0.32,0.7,317.36,296.04,GRA,GRA,0.0,0.0


In [42]:
subtest_dataloader = subtesting.to_dataloader(train=False, batch_size=batch_size, num_workers=cpu_count)
raw_predictions, x= best_tft.predict(subtest_dataloader, mode="raw", show_progress_bar=True, return_x= True)
print(raw_predictions.prediction[:5])

Predict: 100%|██████████| 1/1 [00:03<00:00,  3.14s/ batches]

tensor([[[-1.3710, -0.3597, -0.0812,  0.1400,  0.3113,  0.6013,  1.5250]],

        [[-1.0719, -0.2458, -0.0329,  0.1484,  0.2788,  0.4999,  1.2424]],

        [[-0.8736, -0.1617,  0.0038,  0.1463,  0.2451,  0.4181,  1.0627]],

        [[-0.8030, -0.1575, -0.0087,  0.1278,  0.2187,  0.3770,  0.9721]],

        [[-1.8909, -0.9424, -0.5524, -0.1261,  0.2219,  0.6450,  1.5386]]])





In [40]:
print(x.keys())
print(x['decoder_target'][:5])
print(x['decoder_cat'][:5])
print(x['groups'][:5])
print(x['decoder_time_idx'][:5])

dict_keys(['encoder_cat', 'encoder_cont', 'encoder_target', 'encoder_lengths', 'decoder_cat', 'decoder_cont', 'decoder_target', 'decoder_lengths', 'decoder_time_idx', 'groups', 'target_scale'])
tensor([[ 0.1402],
        [ 0.1259],
        [ 0.1268],
        [-0.8061],
        [-0.5396]])
tensor([[[ 6,  2, 10,  0,  0,  1,  1, 30,  0]],

        [[ 6,  2, 10,  0,  0,  1,  1, 30,  1]],

        [[ 6,  2, 10,  0,  0,  1,  1, 30, 12]],

        [[ 6,  2, 10,  0,  0,  1,  1, 30, 17]],

        [[ 6,  2, 10,  0,  0,  1,  1, 30, 18]]])
tensor([[78],
        [78],
        [78],
        [78],
        [78]])
tensor([[350808],
        [350809],
        [350810],
        [350811],
        [350812]])


In [26]:
q_predictions = best_tft.predict(subtest_dataloader, mode="quantiles", show_progress_bar=True)
q_predictions[:5]

Predict: 100%|██████████| 1/1 [00:02<00:00,  2.91s/ batches]


tensor([[[-1.3710, -0.3597, -0.0812,  0.1400,  0.3113,  0.6013,  1.5250]],

        [[-1.0719, -0.2458, -0.0329,  0.1484,  0.2788,  0.4999,  1.2424]],

        [[-0.8736, -0.1617,  0.0038,  0.1463,  0.2451,  0.4181,  1.0627]],

        [[-0.8030, -0.1575, -0.0087,  0.1278,  0.2187,  0.3770,  0.9721]],

        [[-1.8909, -0.9424, -0.5524, -0.1261,  0.2219,  0.6450,  1.5386]]])

## Eval of 5 years of Validation data

In [8]:
exp_model_dir = "/root/co2-flux-hourly-gpp-modeling/data/models/tft_model_1yrtrain_tuning_230318_1906" # TODO: Replace to your model dir
best_model_path = "/root/co2-flux-hourly-gpp-modeling/data/models/tft_model_1yrtrain_tuning_230318_1906/trial_0/epoch=12.ckpt"
best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path)

# split data
train_df, val_df, test_df = get_splited_datasets(data_df, VAL_INDEX, TEST_INDEX)
train_df, _, _ = subset_data(train_df, val_df, None, SUBSET_LEN)
(training, validation, testing) = setup_tsdataset(train_df, val_df, test_df, ENCODER_LEN)
print(f"validation timesteps: {len(val_df)}")

del train_df
del test_df
del training
del testing
gc.collect()

# Eval on Validation set
val_dataloader = validation.to_dataloader(train=False, batch_size=128, num_workers=os.cpu_count())

start = default_timer()
val_actuals = torch.cat([y[0] for x, y in iter(val_dataloader)])
val_q_predictions = best_tft.predict(val_dataloader, mode="quantiles", show_progress_bar=True)
print(f"Val eval time: {default_timer() - start}")

# save eval results - also we can resume tuning at a later point in time
with open(exp_model_dir + os.sep + "val5yr_actuals.pkl", "wb") as fout:
    pickle.dump(val_actuals, fout)

with open(exp_model_dir + os.sep + "val5yr_q_predictions.pkl", "wb") as fout:
    pickle.dump(val_q_predictions, fout)

Subest length: 8760 timesteps for each sites
Subset num train timesteps: 683280
Subset num val timesteps: 227760
validation timesteps: 1056072


Predict: 100%|██████████| 8217/8217 [5:34:00<00:00,  2.44s/ batches]  


Val eval time: 20337.633919409003


NameError: name 'val_acutals' is not defined

In [15]:
# load eval results - also we can resume eval at a later point in time
with open(exp_model_dir + os.sep + "val5yr_actuals.pkl", "rb") as fin:
    val_acutals = pickle.load(fin)

with open(exp_model_dir + os.sep + "val5yr_q_predictions.pkl", "rb") as fin:
    val_q_predictions = pickle.load(fin)

val_predictions = val_q_predictions[:, :, 3] # p50 == estimation
val_p90 = val_q_predictions[:, :,5] # p90
del val_q_predictions
    
# Unmasked evaltaion
eval_metric = get_eval_metrics(val_actuals, val_predictions, None, val_p90)
print(f"Unmasked Val eval evaluation: {eval_metric}")

# Masked evlationa
start = default_timer()
mask = torch.logical_not(torch.cat([x['decoder_cat'][:, :, -1].reshape(-1) for x, y in iter(val_dataloader)]))
masked_eval_metric = get_eval_metrics(val_actuals, val_predictions, mask, val_p90)
eval_time = default_timer() - start
print(f"Masked Val eval time: {eval_time}")
print(f"Masked Val eval evaluation: {masked_eval_metric}")

Unmasked Val eval evaluation: {'rmse': 2.2228381633758545, 'mae': 1.0820996761322021, 'nse': 0.9026839733123779, 'r2': 0.9026839521951319, 'p50_loss': 0.2658967971801758, 'p90_loss': 0.13770847022533417}


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fbcaa6f5bd0>
Traceback (most recent call last):
  File "/opt/conda/envs/py310/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1466, in __del__
    self._shutdown_workers()
  File "/opt/conda/envs/py310/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1449, in _shutdown_workers
    if w.is_alive():
  File "/opt/conda/envs/py310/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fbcaa6f5bd0>
Traceback (most recent call last):
  File "/opt/conda/envs/py310/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1466, in __del__
    self._shutdown_workers()
  File "/opt/conda/envs/py310/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1449

Masked Val eval time: 304.4815681009932
Masked Val eval evaluation: {'rmse': 2.2806456089019775, 'mae': 1.1240664720535278, 'nse': 0.9025433659553528, 'r2': 0.902543396600835, 'p50_loss': 0.27481621503829956, 'p90_loss': 0.14068454504013062}
