In [35]:
import pandas as pd
import numpy as np
import torch

# Task 1

Loading the synthetic dataset.

In [36]:
# You may need to edit the path, depending on where you put the files.
data = pd.read_csv('./data/a4_synthetic.csv')

X = data.drop(columns='y').to_numpy()
Y = data.y.to_numpy()

Training a linear regression model for this synthetic dataset.

In [37]:
np.random.seed(1)

w_init = np.random.normal(size=(2, 1)) #loc=0.0:Mean (“centre”) of the distribution, scale=1.0:Standard deviation, size=(2, 1) ->matrix= [[0,0]] 2x1
b_init = np.random.normal(size=(1, 1))

# We just declare the parameter tensors. Do not use nn.Linear.
w = torch.tensor(w_init, requires_grad = True)# a tensor initialized as w_init This is the weight
b = torch.tensor(b_init, requires_grad = True)# a tensor initialized as b_init This is the bias

eta = 1e-2
opt = torch.optim.SGD(params=[w,b] ,lr=eta)# a SGD optimizer with a learning rate of eta

for i in range(10):
    
    sum_err = 0

    for row in range(X.shape[0]):
        opt.zero_grad()
        x = torch.tensor(X[[row], :])
                                     
        y = torch.tensor(Y[[row]]) 
        # Forward pass.
        y_pred = x @ w + b#compute predicted value for x
        err =  (y_pred-y)**2  

        err.backward()
        opt.step()
        # For statistics.
        sum_err += err.item()

    mse = sum_err / X.shape[0]
    print(f'Epoch {i+1}: MSE =', mse)

Epoch 1: MSE = 0.7999661130823178
Epoch 2: MSE = 0.017392390107906875
Epoch 3: MSE = 0.009377418010839892
Epoch 4: MSE = 0.009355326971438456
Epoch 5: MSE = 0.009365440968904256
Epoch 6: MSE = 0.009366989180952533
Epoch 7: MSE = 0.009367207398577986
Epoch 8: MSE = 0.009367238983974489
Epoch 9: MSE = 0.009367243704122532
Epoch 10: MSE = 0.009367244427185763


# Task 2

In [38]:
class Tensor:
    
    # Constructor. Just store the input values.
    def __init__(self, data, requires_grad=False, grad_fn=None):
        self.data = data
        self.shape = data.shape
        self.grad_fn = grad_fn
        self.requires_grad = requires_grad
        self.grad = None
        
    # So that we can print the object or show it in a notebook cell.
    def __repr__(self):
        dstr = repr(self.data)
        if self.requires_grad:
            gstr = ', requires_grad=True'
        elif self.grad_fn is not None:
            gstr = f', grad_fn={self.grad_fn}'
        else:
            gstr = ''
        return f'Tensor({dstr}{gstr})'
    
    # Extract one numerical value from this tensor.
    def item(self):
        return self.data.item()    
    
    
    # For Task 2:
    
    # Operator +
    def __add__(self, right):
        # Call the helper function defined below.
        return addition(self, right)

    # Operator -
    def __sub__(self, right):
        return subtraction(self, right)
                
    # Operator @
    def __matmul__(self, right):
        return matmul(self, right)

    # Operator *
    def __mul__(self, right):
        return mul(self, right)
    def __rmul__(self, right):
        return mul(self, right)        
    # Operator **
    def __pow__(self, right):
        # NOTE! We are assuming that right is an integer here, not a Tensor!
        if not isinstance(right, int):
            raise Exception('only integers allowed')
        if right < 2:
            raise Exception('power must be >= 2')
        return power(self, right)

    
    # Backward computations. Will be implemented in Task 4.
    def backward(self, grad_output=None):
        if self.grad_fn is not None:

            if grad_output is None:
                self.grad_fn.backward(grad_output=1)
            else:
                self.grad_fn.backward(grad_output)
        else:
            if self.requires_grad:
                if type(grad_output) == int:
                    self.grad = np.array(grad_output)
                else:
                    self.grad = grad_output
            else:
                return None

        
# A small utility where we simply create a Tensor object. We use this to 
# mimic torch.tensor.
def tensor(data, requires_grad=False):
    return Tensor(data, requires_grad)
        
# We define helper functions to implement the various arithmetic operations.

# This function takes two tensors as input, and returns a new tensor holding
# the result of an element-wise addition on the two input tensors.
def addition(left, right):
    new_data = left.data + right.data 
    grad_fn = AddNode(left, right)
    return Tensor(new_data, grad_fn=grad_fn)

def subtraction(left, right):
    new_data = left.data - right.data
    grad_fn = SubNode(left, right)
    return Tensor(new_data, grad_fn=grad_fn)

def matmul(left, right):
    if left.shape == [1,1] or right.shape == [1,1] or left.shape == () or right.shape == ():
        new_data = left.data @ right.data
    elif left.shape[-1] == right.shape[0]:
        new_data = left.data @ right.data
    else:
        raise Exception(f"Shape of matrises do not match: {left.shape} does not match {right.shape}")
    grad_fn = MatMulNode(left, right)
    return Tensor(new_data, grad_fn=grad_fn)

def mul(left, right):
    if isinstance(left,(int,float)):
        new_data = left * right.data
        grad_fn = right.grad_fn
        
    else:
        new_data = left.data * right  
        grad_fn = left.grad_fn
        
    return Tensor(new_data, grad_fn=grad_fn)    

def power(left, right):
    new_data = left.data ** right
    grad_fn = PowNode(left, right)
    return Tensor(new_data, grad_fn=grad_fn)

def tanh(x):
    new_data = np.tanh(x.data)
    grad_fn = TanhNode(x)
    return Tensor(new_data, grad_fn=grad_fn)

def sigmoid(x):
    new_data = 1/(1+np.exp((-1)*x.data))
    return new_data

def cross_entropy(x, y):
    new_data = (-1)*(y.data*np.log(sigmoid(x))-(1-y.data)*np.log(1-sigmoid(x)))
    grad_fn = CrossEntropyNode(x, y)
    return Tensor(new_data, grad_fn=grad_fn)

Some sanity checks.

In [39]:
# Two tensors holding row vectors.
x1 = tensor(np.array([[2.0, 3.0]]))
x2 = tensor(np.array([[1.0, 4.0]]))
# A tensors holding a column vector.
w = tensor(np.array([[-1.0], [1.2]]))

# Test the arithmetic operations.
test_plus = x1 + x2
test_minus = x1 - x2
test_power = x2 ** 2
test_matmul = x1 @ w

print(f'Test of addition: {x1.data} + {x2.data} = {test_plus.data}')
print(f'Test of subtraction: {x1.data} - {x2.data} = {test_minus.data}')
print(f'Test of power: {x2.data} ** 2 = {test_power.data}')
print(f'Test of matrix multiplication: {x1.data} @ {w.data} = {test_matmul.data}')

# Check that the results are as expected. Will crash if there is a miscalculation.
assert(np.allclose(test_plus.data, np.array([[3.0, 7.0]])))
assert(np.allclose(test_minus.data, np.array([[1.0, -1.0]])))
assert(np.allclose(test_power.data, np.array([[1.0, 16.0]])))
assert(np.allclose(test_matmul.data, np.array([[1.6]])))

Test of addition: [[2. 3.]] + [[1. 4.]] = [[3. 7.]]
Test of subtraction: [[2. 3.]] - [[1. 4.]] = [[ 1. -1.]]
Test of power: [[1. 4.]] ** 2 = [[ 1. 16.]]
Test of matrix multiplication: [[2. 3.]] @ [[-1. ]
 [ 1.2]] = [[1.6]]


# Tasks 3 and 4

In [40]:
class Node:
    def __init__(self):
        pass
    
    def backward(self, grad_output):
        return grad_output
        
    def __repr__(self):        
        return str(type(self))  

class AddNode(Node):
    def __init__(self, left, right):
        self.left = left
        self.right = right
        
    def backward(self, grad_output):
        self.left.backward(grad_output)
        self.right.backward(grad_output)

class SubNode(Node):
    def __init__(self, left, right):
        self.left = left
        self.right = right

    def backward(self, grad_output):        
        self.left.backward(grad_output)
        self.right.backward(grad_output)


class MatMulNode(Node):
    def __init__(self, left, right):
        self.left = left
        self.right = right

    def backward(self, grad_output):
        self.left.backward(grad_output @ self.right.data.T)
        self.right.backward(self.left.data.T @ grad_output)

class PowNode(Node):
    def __init__(self, left, right):
        self.left = left
        self.right = right

    def backward(self, grad_output):
        self.left.backward(grad_output*self.right*self.left.data)

class TanhNode(Node):
    def __init__(self, inp):
        self.inp = inp

    def backward(self, grad_output):
        self.inp.backward(grad_output*(1-np.tanh(self.inp.data)**2))

class CrossEntropyNode(Node):
    def __init__(self, x, y):
        self.x = x
        self.y = y

    def backward(self, grad_output):
        self.x.backward(grad_output*(-1*(self.y.data*sigmoid(-1*self.x) - (1 - self.y.data)*sigmoid(self.x))))
        

Sanity check for Task 3.

In [41]:
x = tensor(np.array([[2.0, 3.0]]))
w1 = tensor(np.array([[1.0, 4.0]]), requires_grad=True)
w2 = tensor(np.array([[3.0, -1.0]]), requires_grad=True)

test_graph = x + w1 + w2

print('Computational graph top node after x + w1 + w2:', test_graph.grad_fn)

assert(isinstance(test_graph.grad_fn, AddNode))
assert(test_graph.grad_fn.right is w2)
assert(test_graph.grad_fn.left.grad_fn.left is x)
assert(test_graph.grad_fn.left.grad_fn.right is w1)

Computational graph top node after x + w1 + w2: <class '__main__.AddNode'>


Sanity check for Task 4.

In [42]:
x = tensor(np.array([[2.0, 3.0]]))
w = tensor(np.array([[-1.0], [1.2]]), requires_grad=True)
y = tensor(np.array([[0.2]]))

# We could as well write simply loss = (x @ w - y)**2
# We break it down into steps here if you need to debug.

model_out = x @ w 
diff = model_out - y
loss = diff ** 2
test = tanh(loss)

loss.backward()

print('Gradient of loss w.r.t. w =\n', w.grad)

assert(np.allclose(w.grad, np.array([[5.6], [8.4]])))
assert(x.grad is None)
assert(y.grad is None)

Gradient of loss w.r.t. w =
 [[5.6]
 [8.4]]


In [43]:
pt_x = torch.tensor(np.array([[2.0, 3.0]]))
pt_w = torch.tensor(np.array([[-1.0], [1.2]]), requires_grad=True)
pt_y = torch.tensor(np.array([[0.2]]))

pt_model_out = pt_x @ pt_w 
pt_model_out.retain_grad() # Keep the gradient of intermediate nodes for debugging.

pt_diff = pt_model_out - pt_y
pt_diff.retain_grad()

pt_loss = pt_diff ** 2
pt_loss.retain_grad()

pt_loss.backward()
pt_w.grad

tensor([[5.6000],
        [8.4000]], dtype=torch.float64)

# Task 5

In [44]:
class Optimizer:
    def __init__(self, params):
        self.params = params
    
    def zero_grad(self):
        for p in self.params:
            p.grad = np.zeros_like(p.data)
        
    def step(self):
        for p in self.params:
            p.data = p.data - p.grad

class SGD(Optimizer):
    def __init__(self, params, lr, stepLimit = 100):
        super().__init__(params)
        self.lr = lr
        self.stepLimit = stepLimit
        
    def step(self):
        for p in self.params:
            p.data = p.data - self.lr*p.grad


In [45]:
np.random.seed(1)

w_init = np.random.normal(size=(2, 1)) 
b_init = np.random.normal(size=(1, 1))

# We just declare the parameter tensors. Do not use nn.Linear.
w = tensor(w_init, requires_grad = True)# a tensor initialized as w_init This is the weight
b = tensor(b_init, requires_grad = True)# a tensor initialized as b_init This is the bias

eta = 1e-2
opt = SGD(params=[w,b] ,lr=eta)# a SGD optimizer with a learning rate of eta

for i in range(10):
    
    sum_err = 0

    for row in range(X.shape[0]):
        opt.zero_grad()
        x = tensor(X[[row], :]) 
                                
        y = tensor(Y[[row]]) 
        # Forward pass.
        y_pred = x @ w + b#compute predicted value for x
        err =  (y_pred-y)**2  

        err.backward()
        opt.step()
        # For statistics.
        sum_err += err.item()

    mse = sum_err / X.shape[0]
    print(f'Epoch {i+1}: MSE =', mse)

Epoch 1: MSE = 0.7999661130823178
Epoch 2: MSE = 0.017392390107906875
Epoch 3: MSE = 0.009377418010839892
Epoch 4: MSE = 0.009355326971438456
Epoch 5: MSE = 0.009365440968904256
Epoch 6: MSE = 0.009366989180952533
Epoch 7: MSE = 0.009367207398577986
Epoch 8: MSE = 0.009367238983974489
Epoch 9: MSE = 0.009367243704122532
Epoch 10: MSE = 0.009367244427185763


# Task 6

In [46]:
from sklearn.preprocessing import scale
from sklearn.model_selection import train_test_split

a4data = pd.read_csv('data/raisins.csv')

X = scale(a4data.drop(columns='Class'))
Y = 1.0*(a4data.Class == 'Besni').to_numpy()

np.random.seed(0)
shuffle = np.random.permutation(len(Y))
X = X[shuffle]
Y = Y[shuffle]

Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, random_state=0, test_size=0.2)

In [47]:
np.random.seed(1)

w1_init = np.random.normal(size=(7, 3)) 
b1_init = np.random.normal(size=(1, 1))
w2_init = np.random.normal(size=(3, 1))
b2_init = np.random.normal(size=(1, 1))

w1 = tensor(w1_init, requires_grad = True)
b1 = tensor(b1_init, requires_grad = True)

w2 = tensor(w2_init, requires_grad = True)
b2 = tensor(b2_init, requires_grad = True)

opt = Optimizer(params = [w1,w2,b1,b2])
# l0         l1         l2      l3        l4                         

# x1---\
# x2----\              h()
# x3-----\           /    \
# x4 -----> w1 -----> h() -----> w2 -----> Y
# x5-----/           \    /
# x6---/              h()
# x7-/


#train 1 hidden layer node
for i in range(10):
    for row in range(Xtrain.shape[0]):
        opt.zero_grad()
        x = tensor(Xtrain[[row], :])
        y = tensor(Ytrain[row])
        
        l1 = tanh(x@w1 + b1)
        output = l1@w2 +b2

        loss = cross_entropy(output, y)
        loss.backward()
        opt.step()

correct_guesses = 0
wrong_guesses = 0

for row in range(Xtest.shape[0]):
    x = tensor(Xtest[[row], :])
    y = tensor(Ytest[row])
        
    l1 = tanh(x@w1 + b1)
    output = l1@w2 +b

    if (sigmoid(output) < 0.5) & (y.data == 0):
        correct_guesses += 1
    elif (sigmoid(output) > 0.5) & (y.data == 1):
        correct_guesses += 1
    else:
        wrong_guesses +=1

accuracy = correct_guesses/Xtest.shape[0]
print(accuracy)

0.8555555555555555
