In [1]:
from IPython.display import IFrame

In [2]:
IFrame("hw6.pdf", width=1000, height=1000)

## Problem 1

__[b]__

If $ \mathcal{H}' \subset \mathcal{H} $, then $\mathcal{H}'$ is a less expressive hypothesis set. Thus, if there is deterministic noise already when we try to fit $f$ from the larger hypothesis set $\mathcal{H}$, there will be even more noise when we use this subset.

In [5]:
import urllib
import numpy as np

In [16]:
import urllib.request

In [74]:
def in_dta():
    with urllib.request.urlopen("http://work.caltech.edu/data/in.dta") as fpin:
        lines = fpin.read().splitlines()
        for line in lines:
            print(line)
        values = [[float(value) for value in line.strip('\n').split('\r')[0].split()] for line in lines]
    return values

In [42]:
def out_dta():
    fpin = urllib.request.urlopen("http://work.caltech.edu/data/out.dta").read().decode('utf-8')
    for line in fpin:
        print(line)
    # print([float(value) for value in line.strip('\n').split('\r')[0].split()] for line in fpin)
    return [[float(value) for value in line.strip('\n').split('\r')[0].split()] for line in fpin]

In [6]:
def transform(point):
    """
    point is of form (x,y) with x in R2
    """
    return [1, point[0], point[1], point[0]**2, point[1]**2, point[0]*point[1], abs(point[0] - point[1]), abs(point[0] + point[1]), point[2]]

In [7]:
def transformPoints(points):
    transformedPoints = []
    for point in points:
        transformedPoints.append(transform(point))
    return transformedPoints

In [8]:
def linearRegression(samplePoints):
    X = []
    y = []
    y_location = len(samplePoints[0]) -1 # y's location is assumed to be the last element in the list
    
    # construct X, split y vals
    for point in samplePoints:
        X.append(np.array(point[:y_location]))
        y.append(point[y_location])
        
    # convert to np
    X = np.array(X)
    y = np.array(y)
    X_inv = np.linalg.pinv(X)
    
    # use w = X_inv * y one-shot learning
    return X_inv.dot(y)

In [9]:
def regularizedLinearRegression(samplePoints, l):
    """
    perform LR with regularization, where l is lambda
    """
    X = []
    y = []
    y_location = len(samplePoints[0]) -1 # y's location is assumed to be the last element in the list
    
    # construct X, split y vals
    for point in samplePoints:
        X.append(np.array(point[:y_location]))
        y.append(point[y_location])
        
    weights = linearRegression(samplePoints) # get weights to use for regularization
    X = np.array(X)
    
    # get the regularized form of the inverse, taking the pseudo-inv of X + lambda/N * wTw
    X_regInv = np.linalg.pinv(X + np.array(l / len(samplePoints) * weights.dot(weights)))
    
    return X_regInv.dot(y) # again using one-shot learning

In [10]:
def Ein(weights, samplePoints):
    """
    Returns E_in given weights, sample pts
    Assumes samplePoints is a list of lists, last elment in each list
    is the y value.
    """
    errorCount = 0
    y_loc = len(samplePoints[0]) - 1
    for point in samplePoints:
        if np.sign(np.dot(weights, point[:y_loc])) != point[y_loc]:
            errorCount += 1
            
    return errorCount / float(len(samplePoints)) # return as a percentage

In [12]:
def q2():
    trainPoints = in_dta()
    testPoints = out_dta()
    
    transformedTrain = transformPoints(trainPoints) # to train LR
    transformedTest = transformPoints(testPoints) # to test weights
    
    weights = linearRegression(transformedTrain)
    
    print("E_in: {}, E_out: {}".format(Ein(weights, transformedTrain), Ein(weights,  transformedTest)))

In [13]:
def q3(l):
    trainPoints = in_dta()
    testPoints = out_dta()
    
    transformedTrain = transformPoints(trainPoints) # to train LR
    transformedTest = transformPoints(testPoints) # to test weights
    
    weights = regularizedLinearRegression(transformedTrain, l) # this time w/ regularization
    
    print("E_in: {}, E_out: {}".format(Ein(weights, transformedTrain), Ein(weights,  transformedTest)))

In [14]:
def q5(low, high):
    trainPoints = in_dta()
    testPoints = out_dta()
    
    transformedTrain = transformPoints(trainPoints) # to train LR
    transformedTest = transformPoints(testPoints) # to test weights
    
    for i in range(low, high+1):
        e_out = Ein(regularizedLinearRegression(transformedTrain, 10**i), transformedTest)
        print("k = {}, E_out = {}".format(i, e_out))

## Question 2

In [75]:
q2()

b'  -7.7947021e-01   8.3822138e-01   1.0000000e+00'
b'   1.5563491e-01   8.9537743e-01   1.0000000e+00'
b'  -5.9907703e-02  -7.1777995e-01   1.0000000e+00'
b'   2.0759636e-01   7.5893338e-01   1.0000000e+00'
b'  -1.9598312e-01  -3.7548716e-01  -1.0000000e+00'
b'   5.8848947e-01  -8.4255381e-01   1.0000000e+00'
b'   7.1985874e-03  -5.4831650e-01  -1.0000000e+00'
b'   7.3883852e-01  -6.0339369e-01   1.0000000e+00'
b'   7.0464808e-01  -2.0420052e-02   1.0000000e+00'
b'   9.6992666e-01   6.4137120e-01  -1.0000000e+00'
b'   4.3543099e-01   7.4477254e-01  -1.0000000e+00'
b'  -8.4425822e-01   7.4235423e-01   1.0000000e+00'
b'   5.9142471e-01  -5.4602118e-01   1.0000000e+00'
b'  -6.9093124e-02   3.7659995e-02  -1.0000000e+00'
b'  -9.5154865e-01  -7.3305502e-01  -1.0000000e+00'
b'  -1.2988138e-01   7.5676096e-01   1.0000000e+00'
b'  -4.9534647e-01  -5.6627908e-01  -1.0000000e+00'
b'  -9.0399413e-01   5.0922150e-01   1.0000000e+00'
b'   2.9235128e-01   1.6089015e-01  -1.0000000e+00'
b'   6.47985

TypeError: a bytes-like object is required, not 'str'

In [50]:
line = '  -7.7947021e-01   8.3822138e-01   1.0000000e+00\r\n'

In [51]:
values = line.strip('\n').split('\r')[0].split()

In [52]:
values

['-7.7947021e-01', '8.3822138e-01', '1.0000000e+00']

In [53]:
for value in values:
    print(float(value))

-0.77947021
0.83822138
1.0


## Question 3

## Question 4

## Question 5

## Question 6

## Problem 7

__[c]__

Heuristically, what the constraint is saying is that for all weights $w_q$ corresponding to variables of a degree higher than $Q_0$, we want to set those weights to the value $C$.

We can see that if we take the intersection of the constraint that squashes all weights for degree $3$ or higher and that which zeros all weights corresponding to degree $4$ or higher, we just get the hypothesis space $\mathcal{H}_2$.

## Problem 8

We have 18 operations for the 1-0 layer and 4 operations for the 2-1 layer which adds up to $22$ for the backward pass.

## Problem 9

__[a]__

## Problem 10

For the max number of nodes, we want to connect every node to every node in the next layer. Also, with $36$ hidden units we have some leeway in how we construct our network--we can place them all in a single layer, or do something like create $6$ layers with $6$ neurons each.