In [1]:
from os import chdir
chdir("/l/hegdep1/onoffgp/pymodels/")

In [2]:
import numpy as np
import tensorflow as tf
import pandas as pd
import os
import logging
import time
import sys
from scipy.cluster.vq import kmeans
import pickle
import matplotlib.pyplot as plt
import matplotlib as mpl
from numpy.random import RandomState
rng = RandomState(1231)
%matplotlib inline

float_type = tf.float64
jitter_level = 1e-5
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'


modelPath = "tfmodels/forestfires/svgp/run01"
tbPath    = "tfmodels/forestfires/svgp/log/run01"

In [3]:
from onofftf.main import Param, DataSet, GaussKL, KernSE, GPConditional
from onofftf.utils import modelmanager
from gpflow import transforms

In [4]:
raw_data = pd.read_csv("../data/forestfire/forestfires.csv")

In [5]:
lfeatures = ['temp','RH','wind','rain'] #'X','Y','month','day','FFMC','DMC','DC','ISI'
ltarget   = ['area']

In [6]:
raw_feat = raw_data[lfeatures]
raw_target = raw_data[ltarget]

In [9]:
norm_feat = (raw_feat - raw_feat.mean())/ raw_feat.std()

Xraw = norm_feat.values
Yraw = raw_target.values

In [15]:
from sklearn.model_selection import KFold # import KFold
kf = KFold(n_splits=5, random_state=1234, shuffle=False)
for train_index, test_index in kf.split(Xraw):
    Xtrain, Xtest = Xraw[train_index], Xraw[test_index]
    Ytrain, Ytest = Yraw[train_index], Yraw[test_index]
    print(Xtrain.shape,Ytrain.shape)
    print(Xtest.shape,Ytest.shape)

(413, 4) (413, 1)
(104, 4) (104, 1)
(413, 4) (413, 1)
(104, 4) (104, 1)
(414, 4) (414, 1)
(103, 4) (103, 1)
(414, 4) (414, 1)
(103, 4) (103, 1)
(414, 4) (414, 1)
(103, 4) (103, 1)


In [None]:
raw_data = pd.read_csv("../data/forestfire/forestfires.csv")
msk = rng.rand(len(raw_data)) < 0.6
traindf = raw_data[msk]
testdf  = raw_data[~msk]

In [None]:
log_transform = False

lfeatures = ['temp','RH','wind','rain'] #'X','Y','month','day','FFMC','DMC','DC','ISI'
ltarget   = ['area']
Xtrain = traindf[lfeatures].values
Ytrain = traindf[ltarget].values

Xtest  = testdf[lfeatures].values
Ytest  = testdf[ltarget].values

norm_m = Xtrain.mean(axis=0)
norm_sd = np.sqrt(Xtrain.var(axis=0))
Xtrain = (Xtrain - norm_m) / norm_sd
Xtest = (Xtest - norm_m) / norm_sd


if log_transform:
    train_data = DataSet(Xtrain, np.log(Ytrain+1))
else:
    train_data = DataSet(Xtrain,Ytrain+1)

In [None]:
list_to_np = lambda _list : [np.array(e) for e in _list]

num_iter = 50000
num_inducing_f = 40
num_minibatch = 100
num_data = Xtrain.shape[0]
num_features = Xtrain.shape[1]

init_fkell = np.ones(num_features)*5
init_fkvar = 10.

init_noisevar = 0.001

q_diag = True
include_f_mu = True

if include_f_mu: 
    init_f_mu = 0.

init_Zf = kmeans(Xtrain,num_inducing_f)[0]
init_u_fm = np.random.randn(num_inducing_f,1)*0.01

if q_diag:
    init_u_fs_sqrt = np.ones(num_inducing_f).reshape(1,-1).T
else:
    init_u_fs_sqrt = np.diag(np.ones(num_inducing_f))

kern_param_learning_rate = 1e-3
indp_param_learning_rate = 1e-3

assert(init_Zf.shape[0] == num_inducing_f)

In [None]:
# ****************************************************************
# define tensorflow variables and placeholders
# ****************************************************************
X = tf.placeholder(dtype = float_type)
Y = tf.placeholder(dtype = float_type)

with tf.name_scope("f_kern"):
    fkell = Param(init_fkell,transform=transforms.Log1pe(),
                   name="lengthscale",learning_rate = kern_param_learning_rate,summ=True)
    fkvar = Param(init_fkvar,transform=transforms.Log1pe(),
                   name="variance",learning_rate = kern_param_learning_rate,summ=True)

fkern = KernSE(fkell,fkvar)

with tf.name_scope("likelihood"):
    noisevar = Param(init_noisevar,transform=transforms.Log1pe(),
                     name="variance",learning_rate = kern_param_learning_rate,summ=True)

with tf.name_scope("f_ind"):
    Zf = Param(init_Zf,name="z",learning_rate = indp_param_learning_rate)
    u_fm = Param(init_u_fm,name="value",learning_rate = indp_param_learning_rate)
    
    if include_f_mu:
        f_mu = Param(init_f_mu,name="fmu",learning_rate = indp_param_learning_rate,summ=True)
    
    if q_diag:
        u_fs_sqrt = Param(init_u_fs_sqrt,transforms.positive,name="variance",learning_rate = indp_param_learning_rate)
    else:
        u_fs_sqrt = Param(init_u_fs_sqrt,transforms.LowerTriangular(init_u_fs_sqrt.shape[0]),name="variance",learning_rate = indp_param_learning_rate)
    

**define prior kl divergence, variational expectations and predict functions**

In [None]:
def build_prior_kl(u_fm,u_fs_sqrt,fkern,Zf):
    
    kl = GaussKL(u_fm.get_tfv(),u_fs_sqrt.get_tfv(),fkern.K(Zf.get_tfv())) 
    
    return kl

def variational_expectations(Y,fmu,fvar,noisevar):
    return -0.5 * np.log(2 * np.pi) - 0.5 * tf.log(noisevar) \
            - 0.5 * (tf.square(Y - fmu) + fvar) / noisevar

def build_predict(Xnew,u_fm,u_fs_sqrt,fkern,Zf,f_mu=None):
    fmean, fvar = GPConditional(Xnew,Zf.get_tfv(), fkern, u_fm.get_tfv(),full_cov=False,
                                q_sqrt=u_fs_sqrt.get_tfv(),whiten=False)
    
    if f_mu is not None:
        fmean = fmean + f_mu.get_tfv()
        
    return fmean, fvar

**build model and define lower bound**

In [None]:
# get kl term
with tf.name_scope("kl"):
    kl = build_prior_kl(u_fm,u_fs_sqrt,fkern,Zf)
    tf.summary.scalar('kl', kl)

with tf.name_scope("model_build"):
    if include_f_mu:
        fmean,fvar = build_predict(X,u_fm,u_fs_sqrt,fkern,Zf,f_mu)
    else:
        fmean,fvar = build_predict(X,u_fm,u_fs_sqrt,fkern,Zf)
    tf.summary.histogram('fmean',fmean)
    tf.summary.histogram('fvar',fvar)

# compute likelihood
with tf.name_scope("var_exp"):
    var_exp = tf.reduce_sum(variational_expectations(Y,fmean,fvar,noisevar.get_tfv()))
    tf.summary.scalar('var_exp', var_exp)

    # mini-batch scaling
    scale =  tf.cast(num_data, float_type) / tf.cast(num_minibatch, float_type)
    var_exp_scaled = var_exp * scale
    tf.summary.scalar('var_exp_scaled', var_exp_scaled)

# final lower bound
with tf.name_scope("cost"):
    cost =  -(var_exp_scaled - kl)
    tf.summary.scalar('cost',cost)


**define optimizer op**

In [None]:
all_var_list = tf.trainable_variables()
all_lr_list = [var._learning_rate for var in all_var_list]

train_opt_group = []

for group_learning_rate in set(all_lr_list):
    _ind_bool = np.where(np.isin(np.array(all_lr_list),group_learning_rate))[0]
    group_var_list = [all_var_list[ind] for ind in _ind_bool]
    group_tf_optimizer = tf.train.AdamOptimizer(learning_rate = group_learning_rate)
    group_grad_list = tf.gradients(cost,group_var_list)
    group_grads_and_vars = list(zip(group_grad_list,group_var_list))


    group_train_op = group_tf_optimizer.apply_gradients(group_grads_and_vars)

    # Summarize all gradients
    for grad, var in group_grads_and_vars:
        tf.summary.histogram(var.name + '/gradient', grad)

    train_opt_group.append({'names':[var.name for var in group_var_list],
                            'vars':group_var_list,
                            'learning_rate':group_learning_rate,
                            'grads':group_grad_list,
                            'train_op':group_train_op})

train_op = tf.group(*[group['train_op'] for group in train_opt_group])


In [None]:
sess = tf.InteractiveSession()

# model saver
saver = tf.train.Saver()

# tensorboard summary
summ_merged = tf.summary.merge_all()
summary_writer = tf.summary.FileWriter(tbPath,
                                        graph=sess.graph)

    
sess.run(tf.global_variables_initializer())

In [None]:
print('{:>16s}'.format("iteration"),'{:>16s}'.format("objective"),'{:>16s}'.format("var_exp"),'{:>16s}'.format("kl"))

for i in range(num_iter):
    batch = train_data.next_batch(num_minibatch)
    try:    
        summary,_ = sess.run([summ_merged,train_op],feed_dict={X : batch[0],Y : batch[1]})
        
        if i % 100 == 0:
            _cost    = cost.eval({X : batch[0],Y : batch[1]})
            _var_exp = var_exp.eval({X : batch[0],Y :batch[1]})
            _kl      = kl.eval({X : batch[0],Y : batch[1]})
            print('{:>16d}'.format(i),'{:>16.3f}'.format(_cost),'{:>16.3f}'.format(_var_exp),'{:>16.3f}'.format(_kl))
            
            if i > 200:
                summary_writer.add_summary(summary,i)
                summary_writer.flush()

       
    except KeyboardInterrupt as e:
        print("Stopping training")
        break
        
modelmngr = modelmanager(saver, sess, modelPath)
modelmngr.save()
summary_writer.close()

**model checking**

In [None]:
# get test and training predictions
def predict_onoff(Xtrain,Xtest):
    pred_train = np.maximum(fmean.eval(feed_dict = {X:Xtrain}),0)
    pred_test = np.maximum(fmean.eval(feed_dict = {X:Xtest}),0)
    if log_transform:
        pred_train = np.exp(pred_train)-1
        pred_test = np.exp(pred_test)-1
        
    return pred_train, pred_test

In [None]:
print("Noise variance   = " + str(noisevar.get_tfv().eval()))
print("Kf lengthscales  = " + str(fkell.get_tfv().eval()))
print("Kf variance      = " + str(fkvar.get_tfv().eval()))
if include_f_mu:
    print("f mean           = " + str(f_mu.get_tfv().eval()))

In [None]:
pred_train, pred_test = predict_onoff(Xtrain,Xtest)

In [None]:
print("max pred :",pred_train.max())
print("max train:",Ytrain.max())
print("max test :",Ytest.max())

In [None]:
print("train mse:",np.sqrt(np.mean((pred_train - Ytrain)**2)))
print("train mae:",np.mean(np.abs(pred_train - Ytrain)))

In [None]:
print("test mse:",np.sqrt(np.mean((pred_test - Ytest)**2)))
print("test mae:",np.mean(np.abs(pred_test - Ytest)))

In [None]:
abs_error_svgp_train = np.abs(pred_train - Ytrain)
mpl.rcParams['figure.figsize'] = (7,6)
plt.hist(abs_error_svgp_train,range=(0,5.),bins=50,alpha=0.5,color = "#4C997F",log=False,label="svgp")
plt.legend()
plt.xlabel("error")
plt.ylabel("frequency")
plt.title("abosulte error distributions")
plt.tight_layout()
plt.show()