In [1]:
import numpy as np
import pandas as pd
from datetime import  datetime

import pyproj
import os
import sys
import torch
import gpytorch
import zipfile

sys.path.append("../src/")
import affine_operator_win_bounds as affine_ops 

# GP for Carthe DATA

### For each velocity component
* for comparing drifters $i$ and $j$ at times $k$ and $l$.

$$
k(u_{i,k}, u_{j,l}) = \sigma_1^2 \exp\left\{-\frac{1}{2}\left[
    \frac{\|x_{i,k} - x_{j,k}\|^2}{r_{s1}^2}  +
    \frac{(t_{i,k} - t_{j,k})^2}{r_{t1}^2} 
\right] \right\} + 
\sigma_2^2 \exp\left\{-\frac{1}{2}\left[
    \frac{\|x_{i,k} - x_{j,k}\|^2}{r_{s2}^2}  +
    \frac{(t_{i,k} - t_{j,k})^2}{r_{t2}^2} 
\right] \right\} + \mathbb{I}[(i,l)=(j,k)]\sigma_N^2
$$

* Here $\sigma_1^2$ and $\sigma_2^2$ are signal variances,
$r_{s1}$ and $r_{s2}$ are spatial length scales and 
$r_{t1}$ and $r_{t2}$ are temporal length scales.
* This constrains the length scales to be the same for both lattitude and longitude.

In [2]:
# data from https://data.gulfresearchinitiative.org/data/R1.x134.073:0004
data_dir = "../data/oceans/GLAD_15min_filtered"
if not os.path.isdir(data_dir):
    with zipfile.ZipFile(data_dir +".zip", 'r') as zip_ref:
        zip_ref.extractall(data_dir)
fn = data_dir + "/GLAD_15min_filtered.dat"
df_full = pd.read_csv(fn, delim_whitespace=True, header=None, skiprows=5)

df_full = df_full.rename(columns={0:"drifter", 1:"date", 2:"time", 3:"Latitude", 4:"Longitude",
                  5:"Pos Error", 6:"U", 7:"V", 8:"Vel Error"})

# Add columns corresponding to time in hours and position in Kilometers

In [3]:
# First add time in hours
fmt  = "%Y-%m-%d %H:%M:%S.%f"
baseline_str = df_full.iloc[0].date  + " "+ df_full.iloc[0].time
baseline = datetime.strptime(baseline_str, fmt)
print("baseline: ", baseline)
def time_from_baseline(baseline, date, time):
    dt_str = date + " " + time
    dt = datetime.strptime(dt_str, fmt)
    return (dt - baseline).total_seconds()

# add time in hours attribute
all_dates, all_times = df_full.date, df_full.time
all_times_seconds = []
for date, time in zip(all_dates, all_times):
    all_times_seconds.append(time_from_baseline(baseline, date, time))
all_times_hours = np.array(all_times_seconds)/60/60
print("len(all_times_hours):", len(all_times_hours))

df_full['hour'] = all_times_hours


# Next add location in kilometers, taken from John Lodise's code
################################Set up Coordinate system #################################
lat0 = 28.2
lon0 = -88.35
NAD83 = pyproj.Proj("+init=EPSG:3453", preserve_units = False)#False = meters #Louisiana South (ftUS)
x_ori, y_ori = NAD83(lon0,lat0) #define x,y origin using lat0,lon0

# Set limits in space (both in lat/lon and Km from origin) to be used for later filtering.
lat_min, lat_max = 26., 29.5
lon_min, lon_max = -89.5, -85.
(x_min, y_min), (x_max, y_max) = NAD83(lon_min, lat_min), NAD83(lon_max, lat_max)
x_min, x_max, y_min, y_max = (x_min-x_ori)/1000, (x_max-x_ori)/1000, (y_min-y_ori)/1000, (y_max-y_ori)/1000

lons, lats = df_full.Longitude, df_full.Latitude
x, y = NAD83(lons, lats)
# Center around origin and scale to Kilometers
x, y = (x - x_ori)/1000, (y - y_ori)/1000
df_full['x'], df_full['y'] = x, y

baseline:  2012-07-20 01:15:00.143960
len(all_times_hours): 1602883


  projstring = _prepare_from_string(projparams)


## Subset the data to more manageable size

In [4]:
def subset_data(df_full, t_min, t_max, lat_min, lat_max, lon_min, lon_max, downsample_freq, n_drifters):
    df = df_full.copy()
    
    # filter to fewer hours 
    df = df[df.hour >=t_min]
    df = df[df.hour <=t_max]

    # filter to smaller spatial area
    df = df[df.Latitude >= lat_min]
    df = df[df.Latitude <= lat_max]
    df = df[df.Longitude >= lon_min]
    df = df[df.Longitude <= lon_max]

    # filter to first 10 random drifters
    drifter_ids = df.drifter.unique()
    np.random.seed(42)
    drifter_set = np.random.choice(drifter_ids, replace=False, size=n_drifters)
    df = df[df.drifter.isin(drifter_set)]

    # downsample time 
    if not "index_2" in df.columns:
        df['index_2'] = np.array(range(len(df)))
        df = df[df.index_2%downsample_freq == 0 ]

    print("number of rows: ", len(df))
    return df

# Pull out a subset of the data on which to select kernel parameters

In [5]:
# start at t = 424 hours,  for evaluation data we will start at 400
t_min, t_max  = 424, 900 
downsample_freq = 10
n_drifters = 20

df = subset_data(df_full, t_min=t_min, t_max=t_max, lat_min=lat_min, lat_max=lat_max, lon_min=lon_min,
                lon_max=lon_max, downsample_freq=downsample_freq, n_drifters=n_drifters)

# pull out covariates (time, lat, long) and responses (U, V)
X, Y = df[['hour', 'x','y']].to_numpy(), df['U'].to_numpy()
X = torch.tensor(X, dtype=torch.float)
Y = torch.tensor(Y, dtype=torch.float)

number of rows:  3430


# Define GPyTorch Gaussian Process Models

In [6]:
# We will use the simplest form of GP model, exact inference
class TwoScaleGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood, cut_long_ls=False, cut_short_ls=False):
        """train_x is shape [N, 3], each x[0] is [t, lat, lon]
        """
        super(TwoScaleGPModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()
        
        self.covar_module = gpytorch.kernels.ScaleKernel(
            gpytorch.kernels.RBFKernel(active_dims=[0])*
            gpytorch.kernels.RBFKernel(active_dims=[1,2])
        ) + gpytorch.kernels.ScaleKernel(
            gpytorch.kernels.RBFKernel(active_dims=[0])*
            gpytorch.kernels.RBFKernel(active_dims=[1,2])
        )
        self.heuristic_init()
        #k1 (scale: 0.06, ls: 5.2, 17.5)     k2 (scale: 0.21, ls: 206.5, 71.7) noise: 0.001297

        
    def heuristic_init(self):
        ### set initial HPs
        # first for kernel 1 (small variance component)
        k = self.covar_module.kernels[0]
        k.outputscale = 0.06 # signal var
        k.base_kernel.kernels[0].lengthscale =  5.2 # time in hours
        k.base_kernel.kernels[1].lengthscale = 17.5 # length in km
        
        # second for kernel 2
        k = self.covar_module.kernels[1]
        k.outputscale = 0.21
        k.base_kernel.kernels[0].lengthscale = 206.5 # time in hours
        k.base_kernel.kernels[1].lengthscale =  71.7 # length in Kilometers
            
    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

In [7]:
class OneScaleGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, full_GP):
        """train_x is shape [N, 3], each x[0] is [t, lat, lon]
        """
        
        # define likelihood with sum of variances
        likelihood = gpytorch.likelihoods.GaussianLikelihood()
        likelihood.noise_covar.raw_noise_constraint.lower_bound = torch.tensor(1e-6)
        likelihood.noise = full_GP.likelihood.noise + full_GP.covar_module.kernels[0].outputscale
        super(OneScaleGPModel, self).__init__(train_x, train_y, likelihood)

        self.mean_module = gpytorch.means.ConstantMean()
        
        self.covar_module = gpytorch.kernels.ScaleKernel(
            gpytorch.kernels.RBFKernel(active_dims=[0])*
            gpytorch.kernels.RBFKernel(active_dims=[1,2])
        )
        
        # first for kernel 1 (small variance component)
        full_gp_k0 = full_GP.covar_module.kernels[1]
        k = self.covar_module
        k.outputscale = full_gp_k0.outputscale
        # time in hours
        k.base_kernel.kernels[0].lengthscale = full_gp_k0.base_kernel.kernels[0].lengthscale
        # length in km
        k.base_kernel.kernels[1].lengthscale = full_gp_k0.base_kernel.kernels[1].lengthscale 
       
    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

# Fit parameters of a Two-scale GP model

In [8]:
# initialize model
likelihood = gpytorch.likelihoods.GaussianLikelihood()
likelihood.noise_covar.raw_noise_constraint.lower_bound = torch.tensor(1e-6)
likelihood.noise = 0.001297 # initialize noise to value found in l
two_scale_gp = TwoScaleGPModel(X, Y, likelihood)

# set to training mode
likelihood.train()
two_scale_gp.train()

# Use the adam optimizer
optimizer = torch.optim.LBFGS([
    {'params': two_scale_gp.parameters()},  # Includes GaussianLikelihood parameters
], lr=0.1)

# "Loss" for GPs - the marginal log likelihood
with gpytorch.settings.fast_computations(log_prob=False, solves=False, covar_root_decomposition=False):
    mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, two_scale_gp)

    I = 25
    for i in range(I):
        optimizer.zero_grad() # Zero gradients from previous iteration
        
        output = two_scale_gp(X) # Output from model
        
        # Calc loss and backprop gradients
        loss = -mll(output, Y)
        loss.backward()
        print('Iter %03d/%03d - Loss: %.3f   k1 (scale: %.2f, ls: %.1f, %.1f) \
    k2 (scale: %.2f, ls: %.1f, %.1f) noise: %.6f' % (
            i + 1, I, loss.item(),
            two_scale_gp.covar_module.kernels[0].outputscale,
            two_scale_gp.covar_module.kernels[0].base_kernel.kernels[0].lengthscale.item(),
            two_scale_gp.covar_module.kernels[0].base_kernel.kernels[1].lengthscale.item(),
            two_scale_gp.covar_module.kernels[1].outputscale,
            two_scale_gp.covar_module.kernels[1].base_kernel.kernels[0].lengthscale.item(),
            two_scale_gp.covar_module.kernels[1].base_kernel.kernels[1].lengthscale.item(),
            two_scale_gp.likelihood.noise.item()
        ))
        def closure():
            optimizer.zero_grad()
            output = two_scale_gp(X)
            loss = -mll(output, Y)
            loss.backward()
            return loss
        optimizer.step(closure)

Iter 001/025 - Loss: -0.924   k1 (scale: 0.06, ls: 5.2, 17.5)     k2 (scale: 0.21, ls: 206.5, 71.7) noise: 0.001297
Iter 002/025 - Loss: -0.944   k1 (scale: 0.08, ls: 5.0, 17.4)     k2 (scale: 0.21, ls: 206.5, 71.7) noise: 0.001281
Iter 003/025 - Loss: -0.945   k1 (scale: 0.09, ls: 4.9, 17.1)     k2 (scale: 0.20, ls: 206.5, 71.7) noise: 0.001266
Iter 004/025 - Loss: -0.945   k1 (scale: 0.08, ls: 4.9, 15.9)     k2 (scale: 0.21, ls: 206.5, 71.7) noise: 0.001204
Iter 005/025 - Loss: -0.945   k1 (scale: 0.08, ls: 4.9, 15.7)     k2 (scale: 0.21, ls: 206.5, 71.7) noise: 0.001198
Iter 006/025 - Loss: -0.946   k1 (scale: 0.08, ls: 4.9, 15.8)     k2 (scale: 0.21, ls: 206.5, 71.7) noise: 0.001198
Iter 007/025 - Loss: -0.945   k1 (scale: 0.08, ls: 4.9, 15.8)     k2 (scale: 0.21, ls: 206.5, 71.7) noise: 0.001198
Iter 008/025 - Loss: -0.945   k1 (scale: 0.08, ls: 4.9, 15.8)     k2 (scale: 0.21, ls: 206.5, 71.7) noise: 0.001198
Iter 009/025 - Loss: -0.945   k1 (scale: 0.08, ls: 4.9, 15.8)     k2 (sc

In [9]:
# Initialize one-scale GP model
one_scale_gp = OneScaleGPModel(X, Y, two_scale_gp)

# Formulate estimates as affine transformations and compute a c-value

In [10]:
def get_A_C_and_Sigma(one_scale_gp, two_scale_gp, X):
    # get likelihood and prior cov for two scale model
    N = X.shape[0]
    K2 = two_scale_gp.covar_module(X).detach().numpy()
    Sigma = two_scale_gp.likelihood.noise.detach().numpy()*np.eye(N)
    
    # get prior cov_for one scale model
    K1 = one_scale_gp.covar_module(X).detach().numpy()
    print("K1 diag, K2 diag", np.diag(K1)[:3], np.diag(K2)[:3])
    K1 += (one_scale_gp.likelihood.noise.detach().numpy() - 
           two_scale_gp.likelihood.noise.detach().numpy())*np.eye(N)
    print("K1 diag, K2 diag", np.diag(K1)[:3], np.diag(K2)[:3])
    
    A = np.linalg.inv(np.eye(N) + Sigma@np.linalg.inv(K1))
    C = np.linalg.inv(np.eye(N) + Sigma@np.linalg.inv(K2))
    return A, C, Sigma

In [11]:
# Load a smaller subset of the data for computing a c-value as in the paper
np.random.seed(42)
df_more_drifters = subset_data(
    df_full, lat_min=lat_min, lat_max=lat_max, lon_min=lon_min, lon_max=lon_max, 
    t_min=400, t_max=424,
    downsample_freq=12,
    n_drifters=50)

### Pull out covariates 
X_small = df_more_drifters[['hour', 'x','y']].to_numpy()
X_small = torch.tensor(X_small, dtype=torch.float)

### compute c_values for latitudinal and longitudinal components separately
Y_small_lat = df_more_drifters['U'].to_numpy()
Y_small_lon = df_more_drifters['V'].to_numpy()

A, C, Sigma = get_A_C_and_Sigma(one_scale_gp, two_scale_gp, X_small)
N = A.shape[0]
print("N : ", N)
c_val_lat = affine_ops.c_value(Y_small_lat, A, np.zeros(N), C, np.zeros(N), Sigma)
print("c_val latitudinal \t: ", c_val_lat)

# Additionally compute a c-value for an estimate of the longitudinal velocity component
# We model this second component independently, as in prior work.
c_val_lon = affine_ops.c_value(Y_small_lon, A, np.zeros(N), C, np.zeros(N), Sigma)
print("c_val longitudinal \t: ", c_val_lon)

number of rows:  400
K1 diag, K2 diag [0.21256207 0.21256207 0.21256207] [0.29136112 0.29136112 0.29136112]
K1 diag, K2 diag [0.29136112 0.29136112 0.29136112] [0.29136112 0.29136112 0.29136112]
N :  400
c_val latitudinal 	:  0.9998049269934458
c_val longitudinal 	:  0.9987361748360667


  r = _zeros._bisect(f, a, b, xtol, rtol, maxiter, args, full_output, disp)
