In [1]:
import numpy as np
import math

In [18]:
class LinReg:
    def __init__(self, dim):
        self.dim = max(1, dim)
        self.weights = np.zeros((1+dim,1)) #adding one for offset

    def X_reshape(self,X):
        num_examples = X.shape[0]
        real_X = np.c_[np.ones(num_examples), X]
        return real_X
    
    def predict(self,X):
        real_X = self.X_reshape(X)
        cur_h = np.matmul(real_X, self.weights)
        return cur_h

    def train(self,X,Y):
        #for the sake of programming ease, let's just assume inputs are numpy ndarrays
        #and are the proper shapes (X = (n, dim), y = (n,1))
        real_X = self.X_reshape(X)
        pinv_X = np.linalg.pinv(real_X)
        self.weights = np.dot(pinv_X,Y)
        
class LinRegNLT2(LinReg):
    def __init__(self, dim, k, l_reg):
        #want squares of both elements, both elts multiplied, then abs sub and add
        # = 2*dim + 3 
        self.dim = (2*dim + 3)
        #adding the x0 bit
        self.weights = np.zeros((self.dim + 1, 1))
        self.k = int(max(0,k)) # use columns up through k in nonlinear mapping (0-idx)
        self.l_reg = l_reg #lambda regularization term
        
    def set_lambda(self, l_reg):
        self.l_reg = l_reg

    def set_k(self, k):
        self.k = int(max(0,k))

    def X_reshape(self,X):
        #do the nonlinear transform here
        num_ex = X.shape[0] #number of examples
        X_mult = np.prod(X, axis=1)
        X_sub_mtx = np.c_[ X[:,0], np.multiply(-1, X[:,1:])] #subtraction matrix
        X_res = np.c_[np.ones(num_ex), X, np.square(X), X_mult, np.abs(np.sum(X_sub_mtx, axis=1)), np.abs(np.sum(X, axis=1))]
        return X_res[:,:(self.k + 1)]

    def calc_error(self, X,Y):
        num_ex = X.shape[0]
        predicted = np.sign(self.predict(X))
        num_incorrect = np.sum(np.not_equal(predicted, np.sign(Y)))
        prop_incorrect = float(num_incorrect)/float(num_ex)
        return prop_incorrect

    #training with regularization:
    # (ZT*Z + lambda*I)^-1 * ZT*y
    def train_reg(self, X,Y):
        X_res = self.X_reshape(X)
        xtx = np.dot(X_res.T, X_res)
        lm = np.multiply(self.l_reg, np.identity(xtx.shape[0])) #lambda*I
        X_inv = np.linalg.inv(np.add(xtx, lm))
        self.weights = np.dot(X_inv, np.dot(X_res.T, Y))

In [19]:
class LFD_Data:
    def load_file(self, filename):
        ret_X = np.array([])
        ret_Y = np.array([])
        num_ex = 0 #number of examples
        X_dim = 0 #dimension of data
        with open(filename) as f:
            data = f.readlines()
            num_ex = len(data)
            X_dim = len(data[0].split()) - 1
            for line in data:
                cur_XY = [float(x) for x in line.split()]
                ret_X = np.concatenate((ret_X, cur_XY[:-1])) #everything but last elt
                ret_Y = np.concatenate((ret_Y, [cur_XY[-1]])) #last elt
        ret_X = ret_X.reshape((num_ex, X_dim))
        self.dim = X_dim
        return ret_X, ret_Y
            
    def __init__(self, trainfile, testfile):
        self.dim = 0
        self.train_X, self.train_Y = self.load_file(trainfile)
        self.test_X, self.test_Y = self.load_file(testfile)

In [22]:
rwd_train = "in.dta"
rwd_test = "out.dta"
l_reg = math.pow(10.0, -3) #actually, doesn't use regularization but just copying over from last hw anyways.

# load data from external files and init
rwd_data = LFD_Data(rwd_train, rwd_test)
rwd_algo = LinRegNLT2(rwd_data.dim, 7, l_reg)

#editing from hw6 to make more flexible
def rwd_print_error(algo,valid_X, valid_Y, out_X, out_Y):
    #ein = algo.calc_error(in_X, in_Y)
    e_valid = algo.calc_error(valid_X, valid_Y)
    eout = algo.calc_error(out_X, out_Y)
    print("E_valid: %f, E_out: %f" % (e_valid, eout))
    
print("=== LinReg with NLT using the first 25 examples for training (last 10 for validation) ===")
my_k = np.arange(3,8) #up through which cols (0-idx to use for training)
#train without regularization
for k in my_k:
    print("k = %d :" % k)
    rwd_algo.set_k(k)
    rwd_algo.train(rwd_data.train_X[:25,:], rwd_data.train_Y[:25])
    rwd_print_error(rwd_algo, rwd_data.train_X[25:,:], rwd_data.train_Y[25:], rwd_data.test_X, rwd_data.test_Y)

print("")
print("=== LinReg with NLT using the last 10 examples for training (first 25 for validation) ===")
my_k = np.arange(3,8) #up through which cols (0-idx to use for training)
#train without regularization
for k in my_k:
    print("k = %d :" % k)
    rwd_algo.set_k(k)
    rwd_algo.train(rwd_data.train_X[25:,:], rwd_data.train_Y[25:])
    rwd_print_error(rwd_algo, rwd_data.train_X[:25,:], rwd_data.train_Y[:25], rwd_data.test_X, rwd_data.test_Y)

=== LinReg with NLT using the first 25 examples for training (last 10 for validation) ===
k = 3 :
E_valid: 0.300000, E_out: 0.420000
k = 4 :
E_valid: 0.500000, E_out: 0.416000
k = 5 :
E_valid: 0.200000, E_out: 0.188000
k = 6 :
E_valid: 0.000000, E_out: 0.084000
k = 7 :
E_valid: 0.100000, E_out: 0.072000

=== LinReg with NLT using the last 10 examples for training (first 25 for validation) ===
k = 3 :
E_valid: 0.280000, E_out: 0.396000
k = 4 :
E_valid: 0.360000, E_out: 0.388000
k = 5 :
E_valid: 0.200000, E_out: 0.284000
k = 6 :
E_valid: 0.080000, E_out: 0.192000
k = 7 :
E_valid: 0.120000, E_out: 0.196000


In [25]:
n = 10000
e1 = np.random.uniform(0,1, n)
e2 = np.random.uniform(0,1, n)
e = np.minimum(e1, e2)

ev_e1 = np.average(e1)
ev_e2 = np.average(e2)
ev_e = np.average(e)

print("Expected values: ")
print("e: %f" % ev_e)
print("e1: %f" % ev_e1)
print("e2: %f" % ev_e2)

Expected values: 
e: 0.332915
e1: 0.498990
e2: 0.500600
