In [1]:
%matplotlib inline
import edward as ed
from edward.models import Normal
import pandas as pd
import tensorflow as tf
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

# our functions
import setup
import data
import visualizations
import basis_functions
pd.set_option('float_format', '{:f}'.format)
setup.set_random_seeds(42)
plt.style.use("seaborn-talk")
sns.set_context("talk")

In [2]:
manhattan = data.get_borough_data("data/preprocessed.csv", "Manhattan")
ues_to_msh = data.get_neighborhood_to_neighborhood("Morningside Heights", "Upper East Side-Carnegie Hill", manhattan)

In [3]:
indicator_cols = ["pickup_latitude", "pickup_longitude", "dropoff_latitude", "dropoff_longitude", "manhattan_distance", "pickup_hour", "pickup_timestamp"]
y_cols = ["trip_duration"]

x_train_raw, y_train_raw, x_test_raw, y_test_raw = data.train_test_split(ues_to_msh, 0.1, indicator_cols, y_cols)
x_train = data.standardize_cols(x_train_raw)
x_test = data.standardize_cols(x_test_raw)


# Box's Loop - Iteration 3

## Model: Gaussian Process

Gaussian processes (GPs) are a supervised machine learning algorithm that measures the similarity between input data points, using a predefined kernel function, to predict the value of an unseen data point. We like to think about GPs as: defining an infinite distribution on functions over a continuous sapce where we observe some data points and want to assign probabilities to all the ways a line could be drawn through those points. The idea is that we hope these lines (or functions) we draw are similar with high probability to that true function which we will never know. 

Drawing a parallel to our polynomial basis GLM example: instead of choosing a polynomial basis function of degree four, with GPs using a kernel function we'd like to infer the true function from our data. Hence, GPs allow us retain the flexibility of capturing non-linearities in our data but accounting for "infinite" numbers of basis functions. 

### Interpretation of Kernel Function

GPs are parameterized by a pre-determined *kernel function* which is positive-semidefinite covariance matrix that calculated distanced between every pair of $N$ observed points. The Kernel function must be a square matrix and allows use to explore *smoothness* and *periodicity* in our observed data. 

In our analysis we will explore a few kernel functions but note there are many popular kernel functions, some more applicable than others and exploring them all was impossible. 

### Model Overview

A GP can be specified entirely by it's Kernel Function and mean (often assumed to 0): 

Where,

**Prior:** $p(f) = GP(\ 0,\ K(x,x',\theta)\ )$ where $\theta$ = *length_scale* $l_f$, sigma $\sigma_f$

**Likelihood:**  $p(y\ |\ f,\ X,\ \theta) = GP(\ f,\ K(x,x', \theta)\ )$

In [46]:
n_test, d_test = x_test.shape
y_n, y_d = y_train_raw.shape
N, D = x_train.shape

x_test_gp = tf.placeholder(tf.float32, [n_test, d_test])
x_gp = tf.placeholder(tf.float32, [N, D])
y_gp = tf.placeholder(tf.float32, [y_n, y_d])

mu, var = tf.nn.moments(y_gp, 1)

v_i = tf.eye(N) * var

k = ed.rbf(x_gp, lengthscale=0.9996, variance=1.0) + v_i

k_star = ed.rbf(x_test_gp, x_gp)

k_star_star = ed.rbf(x_test_gp)

k_inv = tf.matrix_inverse(k)

mu_star = tf.matmul(tf.matmul(k_star, k_inv), y_gp)

tmp1 = tf.matmul(k_star, k_inv)

tmp2 = tf.matmul(tmp1, k_star, transpose_b=True)

v_star = k_star_star - tmp2

L = tf.cholesky(tf.abs(v_star))

# p_y = ed.models.Normal(loc=mu_star, scale=v_star)
m = mu_star.eval(session=tf.Session(), feed_dict={
    x_gp: x_train.as_matrix(),
    y_gp: y_train_raw.as_matrix(),
    x_test_gp: x_test.as_matrix()
})

cov = v_star.eval(session=tf.Session(), feed_dict={
    x_gp: x_train.as_matrix(),
    y_gp: y_train_raw.as_matrix(),
    x_test_gp: x_test.as_matrix()})

expected = np.random.multivariate_normal(m.reshape((m.shape[0],)), cov)
# p_y = ed.models.Normal(loc=tf.reshape(m, (m.shape[0],)), scale=cov)

# expected = p_y.sample().eval(session=tf.Session(), feed_dict={
#     x_gp: x_train.as_matrix(),
#     y_gp: y_train_raw.as_matrix(),
#     x_test_gp: x_test.as_matrix()
# })

print("expected.shape", expected.shape)
# expected = mu_star.eval(session=tf.Session(), feed_dict={
#     x_gp: x_train.as_matrix(),
#     y_gp: y_train_raw.as_matrix(),
#     x_test_gp: x_test.as_matrix()
# })

print("prediction")
print(expected)
print("actual")
print(y_test_raw)
print("error")
err = y_test_raw.as_matrix() - expected
print(np.abs(err))
print("mean abs error")
print(np.abs(err).mean())

expected.shape (70,)
prediction
[ 11.34889621  18.54625534  35.93692691  25.19478301  10.20763848
  21.62984954  16.90604639  34.07454377   7.83611066  19.95123486
   9.29506565  13.27852904   9.21523767  11.10381997   4.96393114
  24.74958625  10.96811069  20.2788163   13.1529822   15.61556318
  22.19526823  17.21266034  10.09846134  13.09524108  15.74238393
   9.95723267  15.16143206  15.44127301  28.45845798  19.06734572
  12.40959524  19.36835764  28.54778205   4.70653486   7.66773061
   4.93242844  12.94008577  12.54136809  12.3250377   11.55846327
   2.30321391  16.60690634  20.80901859  13.74256706  21.74172237
   9.42484636   9.82305667  21.17685515  11.89634161   7.77866968
  -0.50854894   9.70219824  12.65670345  10.71287245   8.78996084
  10.58268715  20.51633212  10.55296479  22.16884611   4.43730056
  10.33461223  11.85048421  16.07845154  18.23911833  19.89156371
  15.5184552   29.3720064   19.88120754   9.20595701  15.00647261]
actual
     trip_duration
495      14.46666



In [None]:
def my_rbf(X, X2=None, lengthscale=1.0, variance=1.0): 
    X = tf.convert_to_tensor(X)
    X = X / lengthscale
    Xs = tf.reduce_sum(tf.square(X), 1)
    if X2 is None:
        X2 = X
        X2s = Xs
    else:
        X2 = tf.convert_to_tensor(X2)
        X2 = X2 / lengthscale
        X2s = tf.reduce_sum(tf.square(X2), 1)

    square = tf.reshape(Xs, [-1, 1]) + tf.reshape(X2s, [1, -1]) - 2 * tf.matmul(X, X2, transpose_b=True)
    output = variance * tf.exp(-square / 2)
    return output
my_rbf()

In [5]:
gp = ed.models.MultivariateNormalTriL(scale=tf.zeros([N]), scale_tril=tf.cho)

TypeError: __init__() missing 1 required positional argument: 'scale'

In [8]:
"""
ed.evaluate("mean_absolute_error",
            data={inf_gp_x: x_test_standardized_harlem_to_bat_park.as_matrix(),
                  inf_gp_qf: np.reshape(y_test_harlem_to_bat_park.as_matrix(), (y_test_harlem_to_bat_park.shape[0]))},
             output_key=inf_gp_qf)
"""

TypeError: Key-value pair in data does not have same shape: (32,), (7,)