In [7]:
from IPython.display import IFrame
import pylab as plt
%matplotlib inline
import numpy as np
import copy

# Load data
X_train = np.loadtxt("X_train.csv", delimiter=',', dtype="int", skiprows=1)
y_train = np.loadtxt("y_train.csv", delimiter=',', dtype="int", skiprows=1, usecols=1)
actual = y_train[:,None]
N = X_train.shape[0]
X_test = np.loadtxt("X_test.csv", delimiter=',', dtype="int", skiprows=1)

def plot_feature(X, y, log=True):
    plt.figure()
    if log:
        plt.xscale('log')
        plt.yscale('log')
    plt.plot(X, y, 'go')

def get_weights(X, t):
    return np.dot(np.linalg.inv(np.dot(X.T,X)),np.dot(X.T,t))

def get_regularised_weights(X, t, lamb=0):
    weight_count = X.shape[1]
    return np.linalg.solve(np.dot(X.T,X) + lamb*np.identity(weight_count),np.dot(X.T,t))

def get_mse(actual, prediction):
    N = actual.shape[0]
    return np.sqrt(np.sum((actual-prediction)**2)/N)

def loocv(X, T, alpha=0):
    N = X.shape[0]
    loss = 0
    
    for n in range(0, N):
        X_spliced = np.concatenate((X[:n], X[n+1:]))
        T_spliced = np.concatenate((T[:n], T[n+1:]))
        
        #w = get_weights(X_spliced, T_spliced)
        w = get_regularised_weights(X_spliced, T_spliced, alpha)
        
        pred = np.dot(X[n], w)
        diff = (pred-T[n])**2
        loss += diff
    
    return np.sqrt(loss/N)

MYCT = X_train[:,0][:,None]
MMIN = X_train[:,1][:,None]
MMAX = X_train[:,2][:,None]
CACH = X_train[:,3][:,None]
CHMIN = X_train[:,4][:,None]
CHMAX = X_train[:,5][:,None]
PRP = y_train[:,None]



sig_sq = 0.05 # Noise variance -- we assume this is fixed

k = 8 # polynomial order

mu0 = np.zeros((k+1,1), dtype=np.float)
sig0 = 100*np.identity(k+1, dtype=np.float)

# Construct the X object, and the test one
X = np.hstack((
    np.ones_like(MYCT),
    MYCT/np.log(MYCT),
    np.log(MMIN),
    np.log(MMAX),
    MMAX,
    MMIN,
    MMAX*MYCT / MMIN,
    CACH**CHMIN,
    CHMAX * MMAX * CACH
))

sig_w = np.linalg.inv((1.0/sig_sq) * np.dot(X.T, X) + np.linalg.inv(sig0))
mu_w = (1.0/sig_sq)*np.dot(sig_w, np.dot(X.T, PRP))
print(sig_w)
print(mu_w)



predmu = np.dot(X_test, mu_w)
predvar = sig_sq + np.diag(np.dot(testX,np.dot(sig_w,testX.T)))
plt.plot(x,t,'ro')
plt.errorbar(testx,predmu,predvar)




#print("Training loss:", get_mse(actual, predictions))
#print("Validation loss:", loocv(X, PRP))

[[ 8.83957620e-02 -1.06541941e-04 -6.11761522e-04 -9.92647922e-03
   7.70514416e-07 -1.96760941e-07  4.52750958e-07  1.04984281e-21
  -1.65307430e-11]
 [-1.06541941e-04  5.24306477e-07 -6.25574153e-06  1.57294702e-05
  -4.16722554e-10  9.90748222e-10 -2.84576870e-09 -3.54005470e-24
   1.65569282e-15]
 [-6.11761522e-04 -6.25574153e-06  1.68000137e-03 -1.30516807e-03
   4.97684075e-08 -2.63260045e-07  1.76104585e-07  1.20585385e-22
  -3.06203916e-13]
 [-9.92647922e-03  1.57294702e-05 -1.30516807e-03  2.26394881e-03
  -1.41815524e-07  2.42068000e-07 -1.94445224e-07 -2.00677838e-22
   2.51958405e-12]
 [ 7.70514416e-07 -4.16722554e-10  4.97684075e-08 -1.41815524e-07
   1.89354170e-11 -2.68037167e-11  6.39557725e-12  1.40478228e-27
  -4.96722203e-16]
 [-1.96760941e-07  9.90748222e-10 -2.63260045e-07  2.42068000e-07
  -2.68037167e-11  9.78427175e-11 -2.18487142e-11 -1.13231649e-26
   4.31260780e-16]
 [ 4.52750958e-07 -2.84576870e-09  1.76104585e-07 -1.94445224e-07
   6.39557725e-12 -2.1848714

In [44]:
X_test = np.loadtxt("X_test.csv", delimiter=',', dtype="int", skiprows=1)
MYCT = X_test[:,0][:,None]
MMIN = X_test[:,1][:,None]
MMAX = X_test[:,2][:,None]
CACH = X_test[:,3][:,None]
CHMIN = X_test[:,4][:,None]
CHMAX = X_test[:,5][:,None]
N = X_test.shape[0]

# Model
X = np.hstack((
    np.ones_like(MYCT),
    MYCT/np.log(MYCT*0.000001),
    np.log(MMIN),
    np.log(MMAX),
    MMAX,
    MMIN,
    MMAX*MYCT / MMIN,
    CACH**CHMIN,
    CHMAX * MMAX * CACH     
))

predictions = np.dot(X, w).flatten()

pred = np.ones((N, 2))
pred[:,0] = range(N)
pred[:,1] = predictions
np.savetxt('predictions_regression.csv', pred, fmt='%d', delimiter=",", header="Id,PRP", comments="")