# Chapter 3: Linear Neural Networks for Regression

### Dhuvi Karthikeyan

##### 1/11/2023

## 3.1 Linear Regression

Minimal notes just on the things I had forgotten/wish to review for retention.

Affine transformation that decomposes into a linear transformation by weighted sum fo input features and then translation by the addition of a bias term. 

Conditional mean: $E[Y|X=x]$ -> MLE Estimates

$$ y = \textbf{w}^Tx + b + \epsilon ~ N(0, \sigma^2)$$

$$ P(y | x) = \frac{1}{\sqrt{2\pi\sigma^2}}\exp{(-\frac{1}{2\sigma^2}(y - \textbf{w}^Tx -b)^2)}$$ 

$$ P(y | X) = \prod_{i=1}^np(y^{(i)}|x^{(i)}) $$

$$ -logP(y|X) = \sum_{i=1}^n \frac{1}{2}log(2\pi\sigma^2) + \frac{1}{2\sigma^2}(y^{(i)} - \textbf{w}^Tx^{(i)} - b)$$

## 3.2 Object-Oriented Design for Implementation

### 3.2.1 Utilities

In [1]:
# Useful function for adding attributes to objects after instantiation

def add_to_class(Class):
    def wrapper(obj):
        setattr(Class, obj.__name__, obj)
    return wrapper

In [2]:
class C:
    def __init__(self):
        self.instantiated = True
        
c = C()

In [12]:
@add_to_class(C)                  # Limits the scope of adding attributes to just the class
def add_name(self, name):
    self.name = name
    
def add_directive(obj, directive): # Global scope to all objects [Not best practices]
    obj.directive = directive

In [10]:
c.add_name('ClassC')

In [13]:
c.name

'ClassC'

In [14]:
add_directive(c, "Exist")

In [15]:
c.directive

'Exist'

## 3.4 Linear Regression from Scratch

This is the main problem from HW1 and is pulled from my implementation in HW1

In [22]:
# Define the DataLoader Class

class DataLoader:
    """
    Implements a Data Loading Class for passing mini-batches to model after
    minor processing to ensure that matrix multiplication works. Assumes shuffle
    to be true.
    """
    def __init__(
          self, 
          inputs,
          labels,
          batch_size
    ):
        self.__len__ = len(labels)
        self.data = self.gen_tensor(inputs)
        self.labels = labels
        self.batch_size = batch_size
        self.called_idx = np.array([], dtype=int)
        self.prefetch = self.__get_idx__()

  
    def reshape_input(self, inputs):
        '''Reshapes input of dims >= 1 to a matrix of n x n_features'''
        n_examples = inputs.shape[0] #Assumes first dim is n_examples
        assert n_examples == self.__len__
        return inputs.reshape(n_examples, int(inputs.size/n_examples))

        def gen_tensor(self, inputs):
            '''Checks for iterable inputs and runs reshape above''' 
            try:
                iter(inputs)
            except TypeError:
                print("Inputs is not an iterable.")
            return self.reshape_input(inputs)

    def __get_idx__(self):
        '''Check that we have enough examples for another batch_size'''
        if self.__len__ - len(self.called_idx) < self.batch_size:
          # On epoch end reset the called indices
          self.called_idx = np.array([], dtype=int)
        remaining_idx = np.delete(np.arange(self.__len__), list(self.called_idx))
        idx = np.random.choice(remaining_idx, self.batch_size, replace=False)
        self.called_idx = np.append(self.called_idx, idx)
        return idx

    def _get_item_(self):
        '''Get a batch of the data when called with batch_size'''
        sampled_idx = self.__get_idx__()
        return (self.data[sampled_idx, :], self.labels[sampled_idx])

In [23]:
# Define the LinearRegression Model Class

class LinearRegression:
    """
    Implements a Linear Regression Model Class for performing linear regression
    without the closed form solution. Randomly initializes weights and biases 
    before using mini-batch SGD to optimize weights
    """
    def __init__(
      self,
      input_dim
    ):
        self.weights = np.random.randn(input_dim)
        self.bias = 0
    
    def forward(self, X):
        #Vectorized implementation of Forward Call w// broadcasting for bias
        return X @ self.weights + self.bias

In [1]:
# Define the Gradient Descent Class

class SGD:
    """
    Implements gradient descent algorithm. Works with arbitrary models
    and loss functions (whose gradients are manually implemented).
    """
    def __init__(
        self,
        model, 
        learning_rate,
        grad_fxn
    ):
        self.eta = learning_rate
        self.grad = grad_fxn
        self.model = model

    def step(self, inputs, preds, labels):
        grads = self.grad(inputs, preds, labels)
        self.update_params(grads)

    def update_params(self, grads):
        params = self.model.params.keys()
        for i,p in enumerate(params):
            self.model.params[p] -= self.eta*grads[i]

In [None]:
# Define the Train_Iters Function
def train_iters(iterations, data_loader, model, lossfn, optimizer):
    running_loss = 0
    loss = []
    for i in tqdm(range(iterations)):
        x,y = data_loader._get_item_()
        preds = model.forward(x)
        running_loss += lossfn(preds, y)
        if  (i + 1) % 1000 == 0:
            loss += [running_loss/1000]
            running_loss = 0
        optimizer.step(x, preds, y)
    return loss

In [None]:
# Define the L2 Loss Function and Gradients

def l2_loss(preds, targets):
    return 1/2*np.sum(np.square(preds-targets))

def l2_grad(inputs, preds, targets):
    weights_grad = np.dot(inputs.T, preds-targets)/len(preds)
    bias_grad = np.dot(np.ones(len(targets)).T, preds-targets)/len(preds) 
    return weights_grad, bias_grad

In [None]:
# Define the L1 Loss Function and Gradients

def l1_loss(preds, targets):
    return np.sum(np.abs(preds-targets))

def l1_grad(inputs, preds, targets):
    # Taken by piecewise derivative calculation
    weights_grad = np.dot(inputs.T, np.sign(preds-targets))/len(preds)
    bias_grad = np.dot(np.ones(len(targets)).T, np.sign(preds-targets))/len(preds)
    return weights_grad, bias_grad

## 3.7 Weight Decay (Regularization)