# Libraries

In [4]:
import GPflow
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('ggplot')
import tensorflow as tf
%matplotlib inline

import time

### Import CSV

In [5]:
data = np.genfromtxt(fname = "data/for_composititional_analysis.csv", 
                     delimiter = ',',
                     usecols = (1,2,3,4,5),
                     skip_header = 1,
                     dtype=None)

In [6]:
# Transformation of array to matrix
def array_to_matrix(x):
    X = []
    for i in range(len(x)):
        X.append([float(x[i])])
    X = np.array(X)
    return X

# Gaussian Processes
Docs:
- [GP Regression](http://gpflow.readthedocs.io/en/latest/notebooks/regression.html)

In [7]:
def compute(X, Y, kernel, name):
    model = GPflow.gpr.GPR(X, Y, kern=kernel)
    
    try:
        model.optimize()
    except:
        print('Exception caught computing', name)
        
    return model

In [8]:
def lml(model):
    """Log marginal likelihood of a GP"""
    
    try:
        return model.compute_log_likelihood()
    except:
        print('Exception caught in lml')
        return -999999999

In [9]:
def compute_gps(X, Y):
    # Kernel with optimized parameters
    l = GPflow.kernels.Linear(1)
    p = GPflow.kernels.PeriodicKernel(1)
    r = GPflow.kernels.RBF(1)

    gps = {}

    gps['l'] = compute(X, Y, l, 'l')
    gps['p'] = compute(X, Y, p, 'p')
    gps['r'] = compute(X, Y, r, 'r')

    gps['l+r'] = compute(X, Y, l+r, 'l+r')
    gps['l+p'] = compute(X, Y, l+p, 'l+p')
    gps['p+r'] = compute(X, Y, p+r, 'p+r')

    gps['l*r'] = compute(X, Y, l*r, 'l*r')
    gps['l*p'] = compute(X, Y, l*p, 'l*p')
    gps['p*r'] = compute(X, Y, p*r, 'p*r')

    gps['l+r+p'] = compute(X, Y, l+r+p, 'l+r+p')
    gps['l+r*p'] = compute(X, Y, l+r*p, 'l+r*p')
    gps['l*r+p'] = compute(X, Y, l*r+p, 'l*r+p')
    gps['l*p+r'] = compute(X, Y, l*p+r, 'l*p+r')
    gps['l*r*p'] = compute(X, Y, l*r*p, 'l*r*p')
    
    return gps

In [10]:
def compute_lmls(models):
    lmls = {}
    for key in models.keys():
        lmls[key] = lml(models[key])
        
    return lmls

In [11]:
def compute_gps_for_dataset(dataset):
    
    ids = np.unique(dataset['f0'])
    
    gpss = {}
    lmls = {}
    maxs = {}
    
    for i in ids:
        print(i)
        # Filter the relevant data
        filtered_data = dataset[dataset['f0'] == i]
        
        # Get X and Y
        X = array_to_matrix(filtered_data['f3'])
        Y = array_to_matrix(filtered_data['f4'])
        
        # Compute GPs and Best
        gps = compute_gps(X, Y)
        print('Compute OK')
        
        likelihoods = compute_lmls(gps)
        print('LMLs OK')
        best = dict_max(likelihoods)
        
        # Save
        #gpss[i] = gps
        #lmls[i] = likelihoods
        maxs[i] = best
        
    return {'gpss': gpss, 'lmls': lmls, 'maxs': maxs}

In [12]:
def dict_max(d):
    maxval = max(d.values())
    keys = [k for k,v in d.items() if v==maxval]
    return keys, maxval

In [36]:
def save_results(res_dictionary, filename):
    f = open('output/' + filename + '.csv', 'w')

    for key in res_temperature['maxs'].keys():
        f.write(str(key))
        f.write(',')
        f.write(str(res_dictionary['maxs'][key][0]))
        f.write(',')
        f.write(str(res_dictionary['maxs'][key][1]))
        f.write('\n')

    f.close()

# Compute Gaussian Processes

In [13]:
temperature = data[data['f2'] == b'"Temperature"']
rain = data[data['f2'] == b'"Rain"']
sales = data[data['f2'] == b'"Sales"']
salary = data[data['f2'] == b'"Salary"']
gym = data[data['f2'] == b'"Gym members"']
fb = data[data['f2'] == b'"FB Friends"']

In [14]:
t0 = time.time()
res_temperature = compute_gps_for_dataset(temperature)
t1 = time.time()
(t1-t0)/60

1
Compute OK
LMLs OK
7
Exception caught computing l*r
Compute OK
LMLs OK
13
Compute OK
LMLs OK
19
Compute OK
Exception caught in lml
Exception caught in lml
LMLs OK
25
Exception caught computing p*r
Exception caught computing l+r*p
Compute OK
LMLs OK
31
Compute OK
LMLs OK
37
Exception caught computing p
Exception caught computing l*r
Compute OK
LMLs OK
43
Compute OK
LMLs OK
49
Compute OK
LMLs OK
55
Compute OK
LMLs OK
61
Exception caught computing p+r
Exception caught computing l*p
Exception caught computing l+r+p
Exception caught computing l*r+p
Exception caught computing l*p+r
Exception caught computing l*r*p
Compute OK
LMLs OK
67
Exception caught computing p+r
Compute OK
Exception caught in lml
LMLs OK
73
Compute OK
LMLs OK
79
Compute OK
LMLs OK
85
Exception caught computing p
Compute OK
LMLs OK
91
Exception caught computing l*r
Exception caught computing l*r*p
Compute OK
LMLs OK
97
Compute OK
LMLs OK
103
Compute OK
LMLs OK
109
Exception caught computing p
Exception caught computing 

72.690969034036

In [37]:
save_results(res_temperature, 'res_temperature')

# All the rest

In [None]:
t0 = time.time()

# Rain
res_rain = compute_gps_for_dataset(rain)
save_results(res_rain, 'res_rain')

# Sales
res_sales = compute_gps_for_dataset(sales)
save_results(res_sales, 'res_sales')

# Salary
res_salary = compute_gps_for_dataset(salary)
save_results(res_salary, 'res_salary')

# Gym
res_gym = compute_gps_for_dataset(gym)
save_results(res_gym, 'res_gym')

# Facebook
res_fb = compute_gps_for_dataset(fb)
save_results(res_fb, 'res_fb')


t1 = time.time()
(t1-t0)/60

2
Compute OK
LMLs OK
8
Exception caught computing l+p
Exception caught computing l*r
Exception caught computing l*r*p
Compute OK
LMLs OK
14
Compute OK
LMLs OK
20
Exception caught computing l*r
Compute OK
LMLs OK
26
Compute OK
LMLs OK
32
Exception caught computing l+r+p
Compute OK
LMLs OK
38
Exception caught computing l*r
Exception caught computing l*r+p
Compute OK
LMLs OK
44
Compute OK
LMLs OK
50
Exception caught computing p+r
Compute OK
LMLs OK
56
Exception caught computing l*p
Compute OK
LMLs OK
62
Exception caught computing p+r
Exception caught computing l*r
Exception caught computing l*p+r
Compute OK
LMLs OK
68
Exception caught computing l*r
Exception caught computing l+r*p
Compute OK
LMLs OK
74
Compute OK
LMLs OK
80
Exception caught computing l*r
Exception caught computing l*r*p
Compute OK
LMLs OK
86
Exception caught computing l*r
Exception caught computing l+r*p
Exception caught computing l*r+p
Exception caught computing l*r*p
Compute OK
LMLs OK
92
Exception caught computing p+r


# STOP

In [51]:
def filter(dataset):
    return dataset[dataset['f0'] < 30]

In [52]:
temperature = filter(temperature)
rain = filter(rain)
sales = filter(sales)
salary = filter(salary)
gym = filter(gym)
fb = filter(fb)

In [None]:
t0 = time.time()
res_temperature = compute_gps_for_dataset(temperature)
res_rain = compute_gps_for_dataset(rain)
res_sales = compute_gps_for_dataset(sales)
res_salary = compute_gps_for_dataset(salary)
res_gym = compute_gps_for_dataset(gym)
res_fb = compute_gps_for_dataset(fb)
t1 = time.time()
(t1-t0)/60

1
Compute OK
LMLs OK
7
Exception caught computing l*r
Compute OK
LMLs OK
13
Compute OK
LMLs OK
19
Compute OK
Exception caught in lml
Exception caught in lml
LMLs OK
25
Exception caught computing p*r
Exception caught computing l+r*p
Compute OK
LMLs OK
31
Compute OK
LMLs OK
37
Exception caught computing p
Exception caught computing l*r
Compute OK
LMLs OK
43
Compute OK
LMLs OK
49
Compute OK
LMLs OK
55
Compute OK
LMLs OK
61
Exception caught computing p+r
Exception caught computing l*p
Exception caught computing l+r+p
Exception caught computing l*r+p
Exception caught computing l*p+r
Exception caught computing l*r*p
Compute OK
LMLs OK
67
Exception caught computing p+r
Compute OK
Exception caught in lml
LMLs OK
73
Compute OK
LMLs OK
79
Compute OK
LMLs OK
85
Exception caught computing p
Compute OK
LMLs OK
91
Exception caught computing l*r
Exception caught computing l*r*p
Compute OK
LMLs OK
97
Compute OK
LMLs OK
103
Compute OK
LMLs OK
109
Exception caught computing p
Exception caught computing 

In [1]:
res_temperature['maxs']

NameError: name 'res_temperature' is not defined

In [None]:
res_temperature['maxs']
res_rain['maxs']
res_sales['maxs']

In [53]:
t0 = time.time()
res_temperature = compute_gps_for_dataset(temperature)
t1 = time.time()
(t1-t0)/60

1
Compute OK
LMLs OK
7
Exception caught computing l*r
Compute OK
LMLs OK
13
Compute OK
LMLs OK
19
Compute OK
Exception caught in lml
Exception caught in lml
LMLs OK
25
Exception caught computing p*r
Exception caught computing l+r*p
Compute OK
LMLs OK


2.8640705863634746

# DEBUG

In [None]:
temperature = data[data['f0'] == 19]
results = compute_gps_for_dataset(temperature)
results