In [1]:
import dill as pickle
import numpy as np
import pandas as pd
from datetime import datetime
from generate_sample_U import *
import geopandas as gpd
import os

In [2]:
def save_obj(filename, obj, obj_name):
    path = f"krause-output/"
    
    if not os.path.exists(path):
        os.makedirs(path)
    
    filepath = f"{path}{filename}_{obj_name}.sav"
    pickle.dump(obj, open(filepath, "wb"))

In [3]:
def load_obj(filename, obj_name):
    path = f"krause-output/{filename}_{obj_name}.sav"
    return pickle.load(open(path, 'rb'))

In [4]:
def load_model(filename):
    path = f"../gp-models/GPR-models/ST-SVGP/{filename}/{filename}"
    model = pickle.load(open(f"{path}_model.sav", 'rb'))
    df = pickle.load(open(f"{path}_df.sav", 'rb'))
    train_df = pickle.load(open(f"{path}_train_df.sav", 'rb'))
    test_df = pickle.load(open(f"{path}_test_df.sav", 'rb'))
    val_df = pickle.load(open(f"{path}_val_df.sav", 'rb'))
    scalers = pickle.load(open(f"{path}_scalers.sav", 'rb'))
    
    return model, df, train_df, test_df, val_df, scalers

In [5]:
def call_predict(model, df, codes):
    min_t, max_t = df["t"].min(), df["t"].max()
    # t_plot = np.linspace(min_t, max_t, max_t-min_t+1).reshape(-1, 1)
    t_plot = np.array([[0.]])
    
    scaled_lats, scaled_lons = [], []
    for code in codes:
        site_lat, site_lon = df.loc[df['code'] == code]['latitude'].unique()[0], df.loc[df['code'] == code]['longitude'].unique()[0]
        scaled_lat = scalers['latitude'].transform(np.array([[site_lat]])).item()
        scaled_lon = scalers['longitude'].transform(np.array([[site_lon]])).item()
        scaled_lats.append([scaled_lat])
        scaled_lons.append([scaled_lon])
    
    scaled_lats, scaled_lons = np.array(scaled_lats), np.array(scaled_lons)
    R_plot = np.tile(np.hstack((scaled_lats, scaled_lons)), [t_plot.shape[0], 1, 1])

    return model.predict(X=t_plot, R=R_plot)

In [6]:
def call_cov(model, df, codes):
    min_t, max_t = df["t"].min(), df["t"].max()
    # t_plot = np.linspace(min_t, max_t, max_t-min_t+1).reshape(-1, 1)
    t_plot = np.array([[0.]])
    
    scaled_lats, scaled_lons = [], []
    for code in codes:
        site_lat, site_lon = df.loc[df['code'] == code]['latitude'].unique()[0], df.loc[df['code'] == code]['longitude'].unique()[0]
        scaled_lat = scalers['latitude'].transform(np.array([[site_lat]])).item()
        scaled_lon = scalers['longitude'].transform(np.array([[site_lon]])).item()
        scaled_lats.append([scaled_lat])
        scaled_lons.append([scaled_lon])
    
    scaled_lats, scaled_lons = np.array(scaled_lats), np.array(scaled_lons)
    R_plot = np.tile(np.hstack((scaled_lats, scaled_lons)), [t_plot.shape[0], 1, 1])

    return model.cov(X=t_plot, R=R_plot)

In [7]:
def call_predict_y(model, df, codes):
    min_t, max_t = df["t"].min(), df["t"].max()
    # t_plot = np.linspace(min_t, max_t, max_t-min_t+1).reshape(-1, 1)
    t_plot = np.array([[0.]])
    
    scaled_lats, scaled_lons = [], []
    for code in codes:
        site_lat, site_lon = df.loc[df['code'] == code]['latitude'].unique()[0], df.loc[df['code'] == code]['longitude'].unique()[0]
        scaled_lat = scalers['latitude'].transform(np.array([[site_lat]])).item()
        scaled_lon = scalers['longitude'].transform(np.array([[site_lon]])).item()
        scaled_lats.append([scaled_lat])
        scaled_lons.append([scaled_lon])
    
    scaled_lats, scaled_lons = np.array(scaled_lats), np.array(scaled_lons)
    R_plot = np.tile(np.hstack((scaled_lats, scaled_lons)), [t_plot.shape[0], 1, 1])

    return model.predict_y(X=t_plot, R=R_plot)

In [8]:
def call_likelihood_cov(model, df, codes):
    min_t, max_t = df["t"].min(), df["t"].max()
    # t_plot = np.linspace(min_t, max_t, max_t-min_t+1).reshape(-1, 1)
    t_plot = np.array([[0.]])
    
    scaled_lats, scaled_lons = [], []
    for code in codes:
        site_lat, site_lon = df.loc[df['code'] == code]['latitude'].unique()[0], df.loc[df['code'] == code]['longitude'].unique()[0]
        scaled_lat = scalers['latitude'].transform(np.array([[site_lat]])).item()
        scaled_lon = scalers['longitude'].transform(np.array([[site_lon]])).item()
        scaled_lats.append([scaled_lat])
        scaled_lons.append([scaled_lon])
    
    scaled_lats, scaled_lons = np.array(scaled_lats), np.array(scaled_lons)
    R_plot = np.tile(np.hstack((scaled_lats, scaled_lons)), [t_plot.shape[0], 1, 1])

    return model.likelihood_cov(X=t_plot, R=R_plot)

In [9]:
filename = "STSVGP_MODEL_2936"

In [10]:
model, df, train_df, test_df, val_df, scalers = load_model(filename)

In [11]:
pred_mean, pred_var = call_predict(model, df, ["HK6", "KC2", "KC1"])



In [12]:
pred_mean, pred_var

(DeviceArray([52.34946478, 52.2275877 , 45.69269291], dtype=float64),
 DeviceArray([2.94179014, 2.36502559, 5.03170062], dtype=float64))

In [13]:
cov_mean, cov_var = call_cov(model, df, ["HK6", "KC2", "KC1"])

In [14]:
cov_mean, cov_var

(DeviceArray([52.34946478, 52.2275877 , 45.69269291], dtype=float64),
 DeviceArray([[ 2.94179014, -0.39691312, -0.28498296],
              [-0.39691312,  2.36502559,  1.17338958],
              [-0.28498296,  1.17338958,  5.03170062]], dtype=float64))

In [15]:
likelihood_mean, likelihood_var = call_likelihood_cov(model, df, ["HK6", "KC2", "KC1"])

[ 9.29094885  8.7141843  11.38085933]
(3,)
(3,)


In [16]:
likelihood_mean, likelihood_var

(DeviceArray([52.34946478, 52.2275877 , 45.69269291], dtype=float64),
 DeviceArray([[ 9.29094885, -0.39691312, -0.28498296],
              [-0.39691312,  8.7141843 ,  1.17338958],
              [-0.28498296,  1.17338958, 11.38085933]], dtype=float64))

In [17]:
pred_y_mean, pred_y_var = call_predict_y(model, df, ["HK6", "KC2", "KC1"])

In [18]:
pred_y_mean, pred_y_var

(DeviceArray([52.34946478, 52.2275877 , 45.69269291], dtype=float64),
 DeviceArray([ 9.29094885,  8.7141843 , 11.38085933], dtype=float64))

In [None]:
likelihood_var-cov_var

DeviceArray([[ 9.29094885,  0.        ,  0.        ],
             [ 0.        ,  8.7141843 ,  0.        ],
             [ 0.        ,  0.        , 11.38085933]], dtype=float64)