In [1]:
import numpy as np
import matplotlib.pyplot as plt

## 1

In [27]:
X = np.array([[0,1,0,1], [3,3,1,1], [1,1,1,1]]).T
y = np.array([1,1,0,0])
w = np.array([-2,1,0])
lambd = .07

In [289]:
def s(X, w):
    
    return sigmoid(X,w) * (1 - sigmoid(X,w)) * np.eye(X.shape[0])

def sigmoid(X, w):
    
#     return - np.log(1 + np.exp(-X @ w))
    
    return 1 / (1 + np.exp(- X @ w))
    
    
def e(X, w, lambd):
    
    return np.linalg.inv(2 * lambd + X.T @ s(X, w) @ X) @ (X.T @ (sigmoid(X,w) - y) - 2 * lambd * w)

def update(X, w, lambd):
    
    w = e(X,w,lambd) + w
    return w

### 1.4

#### (a)

In [76]:
sigmoid(X, w)

array([0.95257413, 0.73105858, 0.73105858, 0.26894142])

#### (b)

In [77]:
w = update(X, w, lambd)
w

array([-2.50536624, -0.75635058,  4.08203851])

#### (c)

In [78]:
sigmoid(X, w)

array([0.85972246, 0.33350396, 0.96529962, 0.69430462])

#### (d)

In [79]:
w = update(X, w, lambd)
w

array([-2.52693901, -2.41320625,  7.70142503])

## 4

In [7]:
import scipy.io as sio

In [825]:
def split(data, size):
    from math import floor
    
    dat = data.copy()
    
    # for reproducibility
    np.random.seed(24)
    
    # shuffle copied data
    np.random.shuffle(dat)
    
    if type(size) == float:
        size = floor(len(dat) * size)

    # training_data, validation_data, training_label, validation_label
    return dat[size:, :-1], dat[:size, :-1], dat[size:, -1], dat[:size, -1]


def sigmoid(r):

    return 1 / (1 + np.exp(-r))
    
    
def loss(X, w, y, lambd, t):
    
    r = X @ w
    
    u1 = -np.log(1 + np.exp(-r))
    u2 = np.log(1 - 1 / (1 + np.exp(-r)))
    
    u1[r <= -t] = r[r <= -t]
    u2[r >= t] = 0
    
    ret = (y @ u1 + (1 - y) @ u2) / X.shape[0]

    return lambd * np.dot(w,w) - ret
    


def deriv(X, w, y, lambd):
    
    r = X @ w
    
    return 2 * lambd * w - (X.T @ (sigmoid(r) - y)) / X.shape[0]


def update(X, w, y, lambd, learning_rate):
    
    dw = deriv(X, w, y, lambd)

    return w - learning_rate * dw


def train(X, y, epochs, learning_rate, lambd, t):
    
    w = np.zeros(X.shape[1])
    loss_history = []
    
    for i in range(epochs):
        
        l = loss(X, w, y, lambd, t)

        w = update(X, w, y, lambd, learning_rate)
        
        if i % 1 == 0:
        
            print(f'Epoch : {i}, Loss : {l}')
            
        loss_history.append(l)
    
    return loss_history

In [824]:
lambd = 2
learning_rate = 1e-8

epochs = 100
threshold = 20


loss_history = train(X_train, y_train, epochs, learning_rate, lambd, threshold)

Epoch : 0, Loss : 0.6931471805599467
Epoch : 1, Loss : 0.6931701294058326
Epoch : 2, Loss : 0.6931930802908044
Epoch : 3, Loss : 0.6932160332150418
Epoch : 4, Loss : 0.6932389881787263
Epoch : 5, Loss : 0.693261945182039
Epoch : 6, Loss : 0.6932849042251612
Epoch : 7, Loss : 0.693307865308274
Epoch : 8, Loss : 0.693330828431559
Epoch : 9, Loss : 0.6933537935951969
Epoch : 10, Loss : 0.6933767607993694
Epoch : 11, Loss : 0.6933997300442578
Epoch : 12, Loss : 0.6934227013300427
Epoch : 13, Loss : 0.6934456746569067
Epoch : 14, Loss : 0.6934686500250299
Epoch : 15, Loss : 0.693491627434594
Epoch : 16, Loss : 0.6935146068857808
Epoch : 17, Loss : 0.6935375883787708
Epoch : 18, Loss : 0.693560571913746
Epoch : 19, Loss : 0.6935835574908874
Epoch : 20, Loss : 0.6936065451103769
Epoch : 21, Loss : 0.6936295347723952
Epoch : 22, Loss : 0.6936525264771237
Epoch : 23, Loss : 0.6936755202247444
Epoch : 24, Loss : 0.6936985160154384
Epoch : 25, Loss : 0.6937215138493866
Epoch : 26, Loss : 0.693744

In [None]:
np.log(1 - 1 / (1 + np.exp(-r)))

In [809]:
1 / (1 + np.exp(-r))

array([0.92067659, 0.9999503 , 0.99999987, ..., 0.95401182, 0.99990109,
       0.99996037])

In [760]:
hist

[array([0., 0., 0., ..., 0., 0., 0.]),
 array([0., 0., 0., ..., 0., 0., 0.]),
 array([0., 0., 0., ..., 0., 0., 0.]),
 array([0., 0., 0., ..., 0., 0., 0.]),
 array([0., 0., 0., ..., 0., 0., 0.]),
 array([0., 0., 0., ..., 0., 0., 0.]),
 array([0., 0., 0., ..., 0., 0., 0.]),
 array([0., 0., 0., ..., 0., 0., 0.]),
 array([0., 0., 0., ..., 0., 0., 0.]),
 array([0., 0., 0., ..., 0., 0., 0.]),
 array([0., 0., 0., ..., 0., 0., 0.]),
 array([0., 0., 0., ..., 0., 0., 0.]),
 array([0., 0., 0., ..., 0., 0., 0.]),
 array([0., 0., 0., ..., 0., 0., 0.]),
 array([0., 0., 0., ..., 0., 0., 0.]),
 array([0., 0., 0., ..., 0., 0., 0.]),
 array([0., 0., 0., ..., 0., 0., 0.]),
 array([0., 0., 0., ..., 0., 0., 0.]),
 array([0., 0., 0., ..., 0., 0., 0.]),
 array([0., 0., 0., ..., 0., 0., 0.]),
 array([0., 0., 0., ..., 0., 0., 0.]),
 array([0., 0., 0., ..., 0., 0., 0.]),
 array([0., 0., 0., ..., 0., 0., 0.]),
 array([0., 0., 0., ..., 0., 0., 0.]),
 array([0., 0., 0., ..., 0., 0., 0.]),
 array([0., 0., 0., ..., 

In [747]:
sum((X_train @ w) > 0)

0

In [665]:
X_train @ wl[1]

array([-0.30030348, -1.19352349, -1.90513088, ..., -0.37113953,
       -1.10888829, -1.21816368])

In [581]:
dat = sio.loadmat('./data.mat')

In [582]:
train = dat['X']
labels = dat['y']

data = np.hstack((train, labels))

X_train, X_test, y_train, y_test = split(data, 600)

X_train, X_val, y_train, y_val = split(np.hstack((X_train, y_train.reshape(-1, 1))), 500)

In [583]:
X_train.shape, y_train.shape, X_val.shape, y_val.shape

((4900, 12), (4900,), (500, 12), (500,))

Testing loss before any update

In [584]:
w = np.zeros(X_train.shape[1])
loss(X_train, w, y_train, lambd)

3396.421184743739

Testing update

In [587]:
a = np.zeros(X_train.shape[1])
a = update(X_train, a, y_train, lambd, learning_rate)
a

array([-3.45864341e-02, -1.78853853e-03, -1.41068535e-03, -2.15074113e-02,
       -2.93009160e-04, -1.22380639e-01, -4.47388492e-01, -4.58637486e-03,
       -1.49372081e-02, -2.59413776e-03, -4.83081355e-02, -2.66141822e-03])

#### 1

In [218]:
lambd = np.arange(0, 50, 5)

In [623]:
lambd = 0
learning_rate = 1e-6
epochs = 1000
threshold = 30

loss_history = train(X_train, y_train, epochs, learning_rate, lambd)