# Example GP emulator code 
### Applied to Thomas Kavoo's PPE experiment
##### Linnia Hawkins 7/13/2023

In [13]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt

import gpflow
from esem import gp_model

from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [14]:
# annual mean?
gpp = pd.read_csv("/glade/u/home/tkavoo/ppe_tools/machine_learning/gpp.csv",engine='python')
gpp = gpp.drop('Ensemble',axis=1)
obs_gpp = pd.read_csv("/glade/u/home/tkavoo/ppe_tools/machine_learning/obsgpp.csv",engine='python')
params = pd.read_csv("/glade/u/home/tkavoo/ppe_tools/machine_learning/params.csv",engine='python')
params = params.drop('Ensemble',axis=1)

In [3]:
# JJA
gpp = pd.read_csv("/glade/u/home/tkavoo/ppe_tools/machine_learning/data/ornl_gpp_jja.csv",engine='python')
gpp = gpp.iloc[:,1:]
obs_gpp = pd.read_csv("/glade/u/home/tkavoo/ppe_tools/machine_learning/data/obsgpp_jja.csv",engine='python')
params = pd.read_csv("/glade/u/home/tkavoo/ppe_tools/machine_learning/data/ornl_params_jja.csv",engine='python')
params = params.iloc[: , 1:]

In [15]:
# Create a MinMaxScaler object
norm_scaler = MinMaxScaler()

# Scale the values in the "params" dataframe
ppe_params= norm_scaler.fit_transform(params[params.columns]) #LRH: these need to be normalized by column

In [16]:
# Scale the values in the "gpp" dataframe
std_scaler = StandardScaler() #LRH: I think GP performs better on standardized outputs (rather than normalized)
gpp_scaled = pd.DataFrame(std_scaler.fit_transform(gpp), columns=gpp.columns)

In [17]:
num_params = np.shape(ppe_params)[1]
num_params

30

### Train emulator

In [11]:
# define kernel
kernel_linear = gpflow.kernels.Linear(active_dims=range(num_params),variance=1)
kernel_RBF = gpflow.kernels.RBF(active_dims = range(num_params), lengthscales=np.tile(1,num_params))
kernel_matern32 = gpflow.kernels.Matern32(active_dims=range(num_params), variance=1, lengthscales = np.tile(1,30))

kernel = kernel_linear + kernel_matern32 + kernel_RBF

2023-07-13 11:39:03.301524: E tensorflow/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2023-07-13 11:39:03.301578: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (crhtc39): /proc/driver/nvidia/version does not exist
2023-07-13 11:39:03.307813: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [12]:
# split training and testing data
Y = gpp_scaled

ntest = 55
X_test, X_train = ppe_params[:ntest], ppe_params[ntest:]
y_test, y_train = Y[:ntest], Y[ntest:]

# use ESEm package to train (note I'm not doing any hyper parameter tuning, ESEm uses gradient descent
emulator = gp_model(np.array(X_train),np.array(y_train),kernel=kernel)

emulator.train()

2023-07-13 11:39:17.113049: W tensorflow/core/kernels/linalg/cholesky_op.cc:56] Cholesky decomposition was not successful. Eigen::LLT failed with error code 1. Filling lower-triangular output with NaNs.


In [None]:
# plot validation
plt.figure(figsize=[10,12])
plt.rcParams.update({'font.size': 10})

y_pred, y_pred_var = emulator.predict(X_test.values)

r2 = np.corrcoef(y_test,y_pred)

ax.plot([-3,3],[-3,3],c='k',linestyle='--',label='1:1 line')
ax.scatter(y_test,y_pred)
ax.text(-3,2,'R2 = '+str(np.round(r2[0,1],3)),fontsize=10)
ax.set_xlabel('CLM (standardized)',fontsize = 10)
ax.set_ylabel('Emulator (standardized)',fontsize = 10)
plt.tight_layout()