In [76]:
from IPython.display import IFrame
import pylab as plt
%matplotlib inline
import numpy as np

X_train = np.loadtxt("X_train.csv", delimiter=',', dtype="int", skiprows=1)
y_train = np.loadtxt("y_train.csv", delimiter=',', dtype="int", skiprows=1, usecols=1)
t = y_train[:,None]

def get_mse(actual, prediction):
    N = actual.shape[0]
    return np.sqrt(np.sum((actual-prediction)**2)/N)

def get_priors(weight_count):
    prior_mean = np.zeros((weight_count, 1))
    prior_cov = 10 * np.identity(weight_count)
    return prior_mean, prior_cov

def get_posteriors(X, t, prior_cov, sigma2):
    posterior_cov = np.linalg.inv((1.0/sigma2) * np.dot(X.T, X) + np.linalg.inv(prior_cov))
    posterior_mu = ((1.0/sigma2)) * np.dot(posterior_cov, np.dot(X.T, actual))
    return posterior_mu, posterior_cov

def get_sampled_weights(mu, covariance, count=10):
    return np.random.multivariate_normal(posterior_mu.flatten(), posterior_cov, count)

def pick_best_weights(X, t, weights):
    lowest_mse = 9999
    best_w = weight_samples[0]

    for w in weight_samples:
        predictions = np.dot(X, w)
        mse = get_mse(t.ravel(), predictions)
        if mse < lowest_mse:
            best_w = w
            lowest_mse = mse

    #print("Lowest MSE:", lowest_mse)
    return best_w
    

MYCT = X_train[:,0][:,None]
MMIN = X_train[:,1][:,None]
MMAX = X_train[:,2][:,None]
CACH = X_train[:,3][:,None]
CHMIN = X_train[:,4][:,None]
CHMAX = X_train[:,5][:,None]
PRP = y_train[:,None]

# Object ported from linear regression
X = np.hstack((
    np.ones_like(MYCT),
    np.log(MYCT),
    np.log(MMIN),
    np.log(MMAX),
    MMAX,
    MMIN,
    CACH**CHMIN,
    CHMAX * MMAX * CACH
))

# Assume noise is fixed
sigma2 = 0.5
weight_count = X.shape[1]

# Priors
prior_mean, prior_cov = get_priors(weight_count)

# Posteriors
posterior_mu, posterior_cov = get_posteriors(X, t, prior_cov, sigma2)

# Generate weight samples
weight_samples = get_sampled_weights(posterior_mu, posterior_cov)

w = pick_best_weights(X, t, weight_samples)

In [69]:
X_test = np.loadtxt("X_test.csv", delimiter=',', dtype="int", skiprows=1)
MYCT = X_test[:,0][:,None]
MMIN = X_test[:,1][:,None]
MMAX = X_test[:,2][:,None]
CACH = X_test[:,3][:,None]
CHMIN = X_test[:,4][:,None]
CHMAX = X_test[:,5][:,None]
N = X_test.shape[0]

# Model
X = np.hstack((
    np.ones_like(MYCT),
    np.log(MYCT),
    np.log(MMIN),
    np.log(MMAX),
    MMAX,
    MMIN,
    CACH**CHMIN,
    CHMAX * MMAX * CACH   
))

predictions = np.dot(X, w).flatten()

pred = np.ones((N, 2))
pred[:,0] = range(N)
pred[:,1] = predictions
np.savetxt('predictions_bayesian.csv', pred, fmt='%d', delimiter=",", header="Id,PRP", comments="")
print("Predictions saved")

Predictions saved
