In [1]:
#import math
#import torch
import gpytorch
import numpy as np
#from sklearn.model_selection import train_test_split
#from kneed import KneeLocator
from sklearn.cluster import KMeans
#from sklearn.metrics import silhouette_score
from gpytorch.kernels import RQKernel as RQ, RBFKernel as SE, \
PeriodicKernel as PER, ScaleKernel, LinearKernel as LIN, MaternKernel as MAT, \
SpectralMixtureKernel as SMK, PiecewisePolynomialKernel as PPK, CylindricalKernel as CYL
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import matplotlib
from matplotlib import pyplot as plt
import pandas as pd
import random
import mobileDataToolkit.preprocessing_v2 as preprocessing
import mobileDataToolkit.analysis as analysis
import mobileDataToolkit.methods as methods
import mobileDataToolkit.metrics as metrics
import utils.GP as GP
import utils.helper_func as helper_func
from utils.helper_func import dec_floor
import geopandas as gpd
import skmob
import skmob.preprocessing.detection
import skmob.preprocessing.clustering
import movingpandas as mpd
import warnings
warnings.filterwarnings("ignore")

In [None]:
in_path = "C:/Users/ekino/OneDrive - UW/GPR/Data/newAllTrips_withmetrics.csv"
c_path = "C:/Users/ekino/OneDrive - UW/GPR/Data/newCompressedTrips.csv"
all_path = "C:/Users/ekino/OneDrive - UW/GPR/Data/10_users_all_obs_raw.csv"

In [None]:
# Mobility metrics dataset preprocessing
m_df = pd.read_csv(c_path, header=0)
m_df = m_df.dropna()

# Filter out trips with unrealistic speeds, durations, and number of points
m_df = m_df[(m_df['vel_avg'] < 80) & #no faster than 80 m/s (as the crow flies)
            (m_df['time_total'] < 7200*4) & # no longer than 6 hours
            (m_df['time_total'] >= 3600) & # no shorter than 1 hour
            (m_df['npoints'] > 4) & # at least 5 points for modeling
            (m_df['StartDay'] == m_df['EndDay']) # start day and end day must be the same
            ]

m_df = m_df[m_df['Id_perc'] != 2141084034]

In [None]:
feats = m_df[['vel_avg', 'distanceTotal', 'time_total', 'hcr', 'vcr', 'npoints', 'sr']]


def mob_clust(feats = feats):
    kmeans_kwargs = {
        "init": "random",
        "n_init": 10,
        "max_iter": 300,
        "random_state": 42,
    }

    # A list holds the SSE values for each k
    sse = []
    for k in range(1, 11):
        kmeans = KMeans(n_clusters=k, **kmeans_kwargs)
        kmeans.fit(feats)
        sse.append(kmeans.inertia_)
      
    plt.style.use("fivethirtyeight")
    plt.plot(range(1, 11), sse)
    plt.xticks(range(1, 11))
    plt.xlabel("Number of Clusters")
    plt.ylabel("SSE")
    #plt.show()
    
    kl = KneeLocator(
        range(1, 11), sse, curve="convex", direction="decreasing"
    )
    
    kmeans = KMeans(
         init="random",
         n_clusters=kl.elbow,
         n_init=10,
         max_iter=300,
         random_state=42
     )
    
    kmeans.fit(feats)
    
    centers = kmeans.cluster_centers_
    labels = kmeans.labels_
    
    return centers, labels, kmeans

random.seed(10)
centers, labels, kmeans = mob_clust(feats)
m_df['labels'] = labels
m_df = m_df.reset_index()
print(m_df.labels.value_counts())

# Cluster 0 is the fastest average speed, and has the lowest sample size
# Cluster 1 is the slowest average speed, and has the highest sample size
# Cluster 2 is the middle speed, and has the middle amount of samples

# Average speeds are inflated due to distances being "as the crow flies"--real transportation networks are more convoluted


In [None]:
df = pd.read_csv(in_path, header=0)
df['date'] = pd.to_datetime(df['Date_Time']).dt.date

trains1 = pd.DataFrame( columns = ['index', 'time', 'day', 'week', 'train_lat', 'train_long', 'trip_ID'])
trains2 = pd.DataFrame( columns = ['index', 'time', 'day', 'week', 'train_lat', 'train_long', 'trip_ID'])
trains3 = pd.DataFrame( columns = ['index', 'time', 'day', 'week', 'train_lat', 'train_long', 'trip_ID'])

tot1 = pd.DataFrame( columns =['trip_ID', 'time', 'day', 'test_lat', 'pred_lat', 'test_long', \
                               'pred_long', 'dist','temp_ocp','prec_train', 'prec_test', 'lengthscale', \
                                   'var_lat', 'var_long', 'noise', 'loss', 'rmse_lat', 'rmse_long']) 
tot2 = pd.DataFrame( columns =['trip_ID', 'time', 'day', 'test_lat', 'pred_lat', 'test_long', \
                               'pred_long', 'dist','temp_ocp','prec_train', 'prec_test', 'lengthscale', \
                                   'var_lat', 'var_long', 'noise', 'loss', 'rmse_lat', 'rmse_long']) 
tot3 = pd.DataFrame( columns =['trip_ID', 'time', 'day', 'test_lat', 'pred_lat', 'test_long', \
                               'pred_long', 'dist','temp_ocp','prec_train', 'prec_test', 'lengthscale', \
                                   'var_lat', 'var_long', 'noise', 'loss', 'rmse_lat', 'rmse_long'])


In [None]:
bin_len_ls = [600, 900, 1200, 1800, 3600] # Bin lengths to test
m_threshold = 200  # Meter threshold for determining similar trips (i.e., if origin and destination are <= 200 m apart)
min_n = 2 # minimum number of points for a similar trip to be considered
max_speed_kmh = 400 # for filtering out unrealistic speeds
spatial_radius_km = 0.1 # for compressing similar points using Douglas-Peucker algorithm

# Main for loop for testing each trip
for i in range(0, len(m_df)):
    #try:
        if m_df['labels'][i] == 0:
            trip1 = df[df['trip_ID'] == m_df['Id_perc'][i]].drop_duplicates(subset=['unix_start_t'], keep='first')

            # Main for loop for testing each bin length
            for j in bin_len_ls:
                try:
                    upper_bound = dec_floor(analysis.tempOcp(trip1, 'unix_start_t', bin_len=j))
                    # Choose random decimal between 0 and upper bound
                    target_ocp = dec_floor(np.random.uniform(0.1, upper_bound))
                    # Simulate gaps in the user's data to match the target level
                    gapped_user_data, train_index = analysis.simulate_gaps(trip1, target_ocp, unix_col='unix_start_t', bin_len=j)

                    # Find all trips associated with trip 1's user
                    trips = df[df['user_ID'] == trip1['user_ID'].iloc[0]]

                    similar_trips = helper_func.loc_based_filter(trips, trip1, m_threshold=m_threshold)

                    # Check if any similar trips have less than or equal to two points; if so, remove them
                    similar_trips = similar_trips.groupby('trip_ID').filter(lambda x: len(x) >= min_n)

                    # Also include trips that are one trip ID away from the trip of interest
                    similar_trips = similar_trips.append(trips[trips['trip_ID'].isin(trip1['trip_ID'] + 1) | trips['trip_ID'].isin(trip1['trip_ID'] - 1)])

                    #tdf = skmob.TrajDataFrame(similar_trips, latitude='orig_lat', longitude='orig_long', datetime='Date_Time')
                    #f_tdf = skmob.preprocessing.filtering.filter(tdf, max_speed_kmh=max_speed_kmh, include_loops=False)
                    #c_tdf = skmob.preprocessing.compression.compress(f_tdf, spatial_radius_km=spatial_radius_km)

                    tr_df = preprocessing.dp_MultiTrip(data=similar_trips)
                    tr_df.Multi_Trip_Preprocess(lat='orig_lat', long='orig_long', datetime='Date_Time')

                    # Move 'unix_start_t' to before 'SaM'
                    cols = list(tr_df.data.columns)
                    cols.insert(21, cols.pop(cols.index('unix_start_t')))
                    tr_df.data = tr_df.data.loc[:, cols]   

                    scaler1 = MinMaxScaler(feature_range=(0, 100))
                    #scaler2 = MinMaxScaler(feature_range=(0, 10))
                    scaler3 = MinMaxScaler(feature_range=(0, 100))

                    # Normalize the unix time such that it starts at 0
                    #tr_df.X_train[:,0] = tr_df.X_train[:,0] - tr_df.X_train[:,0].min()
                    #tr_df.X_test[:,0] = tr_df.X_test[:,0] - tr_df.X_train[:,0].min()

                    unix_train = torch.tensor(np.float64(scaler1.fit_transform(tr_df.X_train[:,0].reshape(-1,1))))
                    #secs_train = torch.tensor(scaler2.fit_transform(tr_df.X_train[:,1].reshape(-1,1))).float()
                    unix_test = torch.tensor(np.float64(scaler1.transform(tr_df.X_test[:,0].reshape(-1,1))))
                    #secs_test = torch.tensor(scaler2.transform(tr_df.X_test[:,1].reshape(-1,1))).float()

                    X_train = torch.cat([unix_train, tr_df.X_train[:, 1::]], -1)
                    X_test = torch.cat([unix_test, tr_df.X_test[:, 1::]], -1)

                    #X_train = tr_df.X_train.float()
                    #X_test = tr_df.X_test.float()

                    y_train = torch.tensor(np.float64(scaler3.fit_transform(tr_df.y_train)))
                    y_test = torch.tensor(np.float64(scaler3.transform(tr_df.y_test)))

                    n_dims = tr_df.X_train.shape[1]

                    tr_df.Multi_Trip_TrainTestSplit(trip1.iloc[0].Date_Time, trip1.iloc[-1].Date_Time, 
                                                training_index = set(gapped_user_data['unix_start_t']), lat='orig_lat', 
                                                long='orig_long', datetime='Date_Time', unix='unix_start_t', inputstart='unix_start_t', inputend=tr_df.data.columns[-1])

                except:
                    pass

In [None]:
bin_len_ls = [600, 900, 1200, 1800, 3600] # Bin lengths to test
m_threshold = 200  # Meter threshold for determining similar trips (i.e., if origin and destination are <= 200 m apart)
min_n = 2 # minimum number of points for a similar trip to be considered
max_speed_kmh = 400 # for filtering out unrealistic speeds
spatial_radius_km = 0.1 # for compressing similar points using Douglas-Peucker algorithm

# Main for loop for testing each trip
for i in range(0, len(m_df)):
    try:
        if m_df['labels'][i] == 0:
            trip1 = df[df['trip_ID'] == m_df['trip_ID'][i]].drop_duplicates(subset=['unix_start_t'], keep='first')

            # Main for loop for testing each bin length
            for j in bin_len_ls:
                #try:
                    upper_bound = dec_floor(analysis.tempOcp(trip1, 'unix_start_t', bin_len=j))
                    # Choose random decimal between 0 and upper bound
                    target_ocp = dec_floor(np.random.uniform(0.1, upper_bound))
                    # Simulate gaps in the user's data to match the target level
                    gapped_user_data, train_index = analysis.simulate_gaps(trip1, target_ocp, unix_col='unix_start_t', bin_len=j)

                    # Find all trips associated with trip 1's user
                    trips = df[df['user_ID'] == trip1['user_ID'].iloc[0]]

                    similar_trips = helper_func.loc_based_filter(trips, trip1, m_threshold=m_threshold)

                    # Check if any similar trips have less than or equal to two points; if so, remove them
                    similar_trips = similar_trips.groupby('trip_ID').filter(lambda x: len(x) >= min_n)

                    # Also include trips that are one trip ID away from the trip of interest
                    similar_trips = similar_trips.append(trips[trips['trip_ID'].isin(trip1['trip_ID'] + 1) | trips['trip_ID'].isin(trip1['trip_ID'] - 1)])

                    tdf = skmob.TrajDataFrame(similar_trips, latitude='orig_lat', longitude='orig_long', datetime='Date_Time')
                    f_tdf = skmob.preprocessing.filtering.filter(tdf, max_speed_kmh=max_speed_kmh, include_loops=False)
                    fc_tdf = skmob.preprocessing.compression.compress(f_tdf, spatial_radius_km=spatial_radius_km)

                    tr_df = preprocessing.dp_MultiTrip(data=similar_trips)
                    tr_df.Multi_Trip_Preprocess(lat='lat', long='lng', datetime='datetime')

                    # Move 'unix_start_t' to before 'SaM'
                    cols = list(tr_df.data.columns)
                    cols.insert(21, cols.pop(cols.index('unix_start_t')))
                    tr_df.data = tr_df.data.loc[:, cols]   
                    tr_df.data.columns
                #except:
                #    pass

        elif m_df['labels'][i] == 1:
            trip2 = df[df['trip_ID'] == m_df['trip_ID'][i]].drop_duplicates(subset=['unix_start_t'], keep='first')
            

            # Main for loop for testing each bin length
            for j in bin_len_ls:
                try:
                    upper_bound = dec_floor(analysis.tempOcp(trip2, 'unix_start_t', bin_len=j))
                    target_ocp = dec_floor(np.random.uniform(0.1, upper_bound))
                    gapped_user_data, train_index = analysis.simulate_gaps(trip2, target_ocp, unix_col='unix_start_t', bin_len=j)
                    pass
                except:
                    pass

        elif m_df['labels'][i] == 2:
            trip3 = df[df['trip_ID'] == m_df['trip_ID'][i]].drop_duplicates(subset=['unix_start_t'], keep='first')
        
            # Main for loop for testing each bin length
            for j in bin_len_ls:
                temp_ocp3 = analysis.tempOcp(trip1, 'unix_start_t', bin_len=j)
                if j <= temp_ocp3:
                    try:
                        upper_bound = dec_floor(analysis.tempOcp(trip3, 'unix_start_t', bin_len=j))
                        target_ocp = dec_floor(np.random.uniform(0.1, upper_bound))
                        gapped_user_data, train_index = analysis.simulate_gaps(trip3, target_ocp, unix_col='unix_start_t', bin_len=j)
                        pass
                    except:
                        pass
                else:
                    pass
    except:
        pass

In [None]:
trip1 = df[df['trip_ID'] == m_df['Id_perc'][12]].drop_duplicates(subset=['unix_start_t'], keep='first')
upper_bound = dec_floor(analysis.tempOcp(trip1, 'unix_start_t', bin_len=60))

In [None]:
# Define target temporal occupancy
target_temp_ocp = 0.5

# Simulate gaps in the user's data to match the target level
gapped_user_data, train_index = analysis.simulate_gaps(trip1, target_temp_ocp, unix_col='unix_start_t', bin_len=600)

In [None]:
train = trip1[trip1['unix_start_t'].isin(train_index)]
test = trip1[~trip1['unix_start_t'].isin(train_index)]

print(len(train))
len(test)

## Stack the local training set with the longitudinal training data

### Out of filtered/compressed data, only retain trips whose start/end locations are within 200m of the start/end location of the testing trip

In [None]:
# Find all trips associated with trip 1's user
trips = df[df['user_ID'] == trip1['user_ID'].iloc[0]]

similar_trips = helper_func.loc_based_filter(trips, trip1, m_threshold=200)

# Check if any similar trips have fewer than three points; if so, remove them
similar_trips = similar_trips.groupby('trip_ID').filter(lambda x: len(x) > 3)

# Also include trips that are one trip ID away from the trip of interest
similar_trips = similar_trips.append(trips[trips['trip_ID'].isin(trip1['trip_ID'] + 1) | trips['trip_ID'].isin(trip1['trip_ID'] - 1)])

In [None]:
%matplotlib inline

In [None]:
# Use smaller font
plt.rcParams.update({'font.size': 8})
f, axs = plt.subplots(1, 2, figsize=(10, 5))
axs[0].set_title('Original Data, n = {}'.format(len(trips)))
axs[1].set_title('OD filtered data, n = {}'.format(len(similar_trips)))
trips.plot(x='orig_long', y='orig_lat', ax=axs[0], color='red', alpha=0.5, s=0.5, kind='scatter')
similar_trips.plot(x='orig_long', y='orig_lat', ax=axs[1], color='blue', alpha=0.5, s=0.5, kind='scatter')

In [None]:
%%time
tdf = skmob.TrajDataFrame(similar_trips, latitude='orig_lat', longitude='orig_long', datetime='Date_Time')
f_tdf = skmob.preprocessing.filtering.filter(tdf, max_speed_kmh=400, include_loops=True)
fc_tdf = skmob.preprocessing.compression.compress(f_tdf, spatial_radius_km=0.1)
#fcs_tdf = skmob.preprocessing.detection.stay_locations(fc_tdf)

n_deleted_points = len(tdf) - len(f_tdf) # number of deleted points during filtering
print("The number of deleted points during filtering is: {}".format(n_deleted_points))

n_deleted_points = len(f_tdf) - len(fc_tdf) # number of deleted points during compression
print("The ratio of deleted points during compression to the number of original points is: {}".format(n_deleted_points / len(similar_trips)))


In [None]:
# Use smaller font
plt.rcParams.update({'font.size': 8})
f, axs = plt.subplots(1, 2, figsize=(10, 5))
axs[0].set_title('Original Data, n = {}'.format(len(similar_trips)))
axs[1].set_title('Filtered and Compressed Data, n = {}'.format(len(fc_tdf)))
tdf.plot(x='lng', y='lat', ax=axs[0], color='red', alpha=0.5, s=1, kind='scatter')
fc_tdf.plot(x='lng', y='lat', ax=axs[1], color='blue', alpha=0.5, s=1, kind='scatter')

In [None]:
#tr_df = preprocessing.dp_MultiTrip(data=fc_tdf)
tr_df = preprocessing.dp_MultiTrip(data=similar_trips)
tr_df.Multi_Trip_Preprocess(lat='orig_lat', long='orig_long', datetime='Date_Time')
tr_df.data.columns

In [None]:
# Exclude the first trip ID in the below array
trip1['trip_ID'].unique()
np.setdiff1d(tr_df.data['trip_ID'].unique(), trip1['trip_ID'].unique()) # Exclude the first trip ID in the below array

In [None]:
# Define colors for each trip
colors = ['green', 'cyan']
# Plot the similar trips
plt.rcParams.update({'font.size': 8})
f, axs = plt.subplots(1, 1, figsize=(10, 5))
# Plot trip1
axs.set_title('Trip 1')
axs.plot(tr_df.data[tr_df.data['trip_ID'] == trip1['trip_ID'].iloc[0]]['lng'], tr_df.data[tr_df.data['trip_ID'] == trip1['trip_ID'].iloc[0]]['lat'], color='red', alpha=0.5, label='Trip 1')
# in the same plot, plot the similar trips using a different color for each trip, but not the original trip
axs.set_title('Similar Trips')
for i, j in enumerate(np.setdiff1d(tr_df.data['trip_ID'].unique(), trip1['trip_ID'].unique())):
    axs.plot(tr_df.data[tr_df.data['trip_ID'] == j]['lng'], tr_df.data[tr_df.data['trip_ID'] == j]['lat'], alpha=0.5, color=colors[i], label='Trip {}'.format(j))

axs.legend()

In [None]:
# Move 'unix_start_t' to before 'SaM'
cols = list(tr_df.data.columns)
cols.insert(22, cols.pop(cols.index('unix_start_t')))
tr_df.data = tr_df.data.loc[:, cols]   
tr_df.data.columns[-1]

In [None]:
tr_df.Multi_Trip_TrainTestSplit(trip1.iloc[0].Date_Time, trip1.iloc[-1].Date_Time, 
                                training_index = set(gapped_user_data['unix_start_t']), lat='orig_lat', 
                                long='orig_long', datetime='Date_Time', unix='unix_start_t', inputstart='SaM', inputend=tr_df.data.columns[-1])

#### Let's confirm that there is no test data in the training set, and vice versa

In [None]:
print("The length of the testing set is " + str(len(test)))
print("The number of testing points that are NOT in the training set is " + str(test['unix_start_t'].isin(tr_df.train['unix_min']).value_counts().item()))
print("The length of the training set is " + str(len(train)))

train['unix_start_t'].isin(tr_df.train['unix_min']).value_counts()
# There was indeed one point that was in the original training set that is not in the new training set (likely due to the filtering)

### Normalize data

In [None]:
torch.set_default_tensor_type(torch.DoubleTensor)

scaler1 = MinMaxScaler(feature_range=(0, 100))
#scaler2 = MinMaxScaler(feature_range=(0, 10))
scaler3 = MinMaxScaler(feature_range=(0, 100))

# Normalize the unix time such that it starts at 0
#tr_df.X_train[:,0] = tr_df.X_train[:,0] - tr_df.X_train[:,0].min()
#tr_df.X_test[:,0] = tr_df.X_test[:,0] - tr_df.X_train[:,0].min()

unix_train = torch.tensor(np.float64(scaler1.fit_transform(tr_df.X_train[:,0].reshape(-1,1))))
#secs_train = torch.tensor(scaler2.fit_transform(tr_df.X_train[:,1].reshape(-1,1))).float()
unix_test = torch.tensor(np.float64(scaler1.transform(tr_df.X_test[:,0].reshape(-1,1))))
#secs_test = torch.tensor(scaler2.transform(tr_df.X_test[:,1].reshape(-1,1))).float()

X_train = torch.cat([unix_train, tr_df.X_train[:, 1::]], -1)
X_test = torch.cat([unix_test, tr_df.X_test[:, 1::]], -1)

#X_train = tr_df.X_train.float()
#X_test = tr_df.X_test.float()

y_train = torch.tensor(np.float64(scaler3.fit_transform(tr_df.y_train)))
y_test = torch.tensor(np.float64(scaler3.transform(tr_df.y_test)))

n_dims = tr_df.X_train.shape[1]
print(n_dims)

In [None]:
from gpytorch.likelihoods.multitask_gaussian_likelihood import _MultitaskGaussianLikelihoodBase
from gpytorch.likelihoods.noise_models import FixedGaussianNoise
from gpytorch.lazy import ConstantDiagLazyTensor, KroneckerProductLazyTensor

class FixedTaskNoiseMultitaskLikelihood(_MultitaskGaussianLikelihoodBase):
    def __init__(self, noise, *args, **kwargs):
        noise_covar = FixedGaussianNoise(noise=noise)
        super().__init__(noise_covar=noise_covar, *args, **kwargs)
        self.has_global_noise = False
        self.has_task_noise = False
        
    def _shaped_noise_covar(self, shape, add_noise=True, *params, **kwargs):
        if not self.has_task_noise:
            data_noise = self.noise_covar(*params, shape=torch.Size((shape[:-2],)), **kwargs)
            eye = torch.ones(1, device=data_noise.device, dtype=data_noise.dtype)
            # TODO: add in a shape for batched models
            task_noise = ConstantDiagLazyTensor(
                eye, diag_shape=torch.Size((self.num_tasks,))
            )
            return KroneckerProductLazyTensor(data_noise, task_noise)
        else:
            # TODO: copy over pieces from MultitaskGaussianLikelihood
            raise NotImplementedError("Task noises not supported yet.")

In [None]:
train_y_var = torch.rand(y_train.shape[0]).exp()

In [None]:
likelihood = gpytorch.likelihoods.MultitaskGaussianLikelihood(num_tasks=2)

model = GP.MTGPRegressor(X_train, y_train, 
                         ScaleKernel(RQ(active_dims = [0])) + ScaleKernel( RQ(ard_num_dims = n_dims - 1, active_dims=list(range(1, n_dims)))))

In [None]:
# Plot the distribution of the length of gaps in the training set, set x_lim to 200 to see the distribution better
tr_df.data.groupby('trip_ID')['unix_start_t'].apply(lambda x: x.diff()).plot.hist(bins=200, figsize=(10,5), xlim=(0,200))
plt.show()

In [None]:
# Set initial lengthscale guess as half the average length of gap in training set
init_lengthscale = similar_trips.groupby('trip_ID')['unix_start_t'].apply(lambda x: x.diff().mean()).mean() / 2

# Initialize model parameters
#scaled_unix_lengthscale = scaler1.transform(torch.tensor(init_lengthscale).reshape(-1,1)).item()
#scaled_SaM_lengthscale = scaler2.transform(torch.tensor(init_lengthscale).reshape(-1,1)).item()

categorical_inits = np.ones(n_dims - 1)
#init_params = np.insert(categorical_inits, 0, scaled_unix_lengthscale)
init_params = np.insert(categorical_inits, 0, init_lengthscale)

model.covar_module.data_covar_module.kernels[0].base_kernel.lengthscale = init_lengthscale
model.covar_module.data_covar_module.kernels[1].base_kernel.lengthscale = torch.tensor(categorical_inits).float()

model.covar_module.data_covar_module.kernels[1].outputscale = torch.tensor(0.2)
model.covar_module.data_covar_module.kernels[0].outputscale = torch.tensor(0.8)
 #= torch.tensor(init_params).float()

In [None]:
ls, mll = GP.training(model, X_train, y_train, lr=0.3)

### Check model parameters

In [None]:
# Check model parameters (converting back to original scale)
print(model.covar_module.data_covar_module.kernels[0].base_kernel.lengthscale)
print(model.covar_module.data_covar_module.kernels[1].base_kernel.lengthscale)
print(model.covar_module.data_covar_module.kernels[0].outputscale)
print(model.covar_module.data_covar_module.kernels[1].outputscale)
print(model.likelihood.noise)

In [None]:
# "Loss" for GPs - the marginal log likelihood
mll = gpytorch.mlls.ExactMarginalLogLikelihood(model.likelihood, model)

with torch.no_grad():
    log_ll = mll(model(X_train), y_train) * X_train.shape[0]
            
N = X_train.shape[0]
m = sum(p.numel() for p in model.hyperparameters())
bic = -2 * log_ll + m * np.log(N)

In [None]:
predictions, mean = model.predict(X_test)

In [None]:
tr_df.train

In [None]:
%matplotlib widget

In [None]:
model.plot_preds(mean, tr_df.date_train, 
                 tr_df.date_test, y_train, y_test)

In [None]:
%matplotlib inline

In [None]:
# Use smaller font
plt.rcParams.update({'font.size': 8})
f, ax = plt.subplots(1, 1, figsize=(10, 5))
ax.set_title('Predictions')
pd.DataFrame(mean.detach().numpy()).plot(x=1, y=0, kind='scatter',ax=ax, color='red', alpha=0.5, s=1)
pd.DataFrame(y_test.detach().numpy()).plot(x=1, y=0, kind='scatter',ax=ax, color='blue', alpha=0.5, s=1)

In [None]:
metrics.average_eval(pd.Series(y_test[:,0]), pd.Series(y_test[:,1]), pd.Series(mean[:,0]), pd.Series(mean[:,1]))