# Lab 4 Report

Prepared by Zech Wolf

## 1. Loading the data

### 1.1 Read and clean NAs

In [1]:
import pandas as pd

df = pd.read_csv("datasets/acs2017_census_tract_data.csv")

In [2]:
df.dropna(inplace=True)

In [3]:
from sklearn.preprocessing import LabelEncoder

#Encode the strings as ints with a label encoder
le = LabelEncoder()
df.State = le.fit_transform(df.State)
df.County = le.fit_transform(df.County)



### 1.3 Quantizing child poverty

The target is calculated using the quartiles of the ChildPoverty variable, ensuring the classes are balanced as much as possible

In [5]:
df["target"] = pd.qcut(x=df.ChildPoverty, q=4, labels=False)

### 1.2 Train-test split

In [6]:
from sklearn.model_selection import train_test_split

X = df.drop(columns=["ChildPoverty", "target"])
y = df["target"] #keep these as pandas objects to make manipulation easier later on

X_train, y_train, X_test, y_test = train_test_split(X.to_numpy(), y.to_numpy(), train_size=0.8, stratify=y) #split as numpy arrays

In [7]:
import numpy as np

#Check the balance of the target
np.bincount(df.target)

array([18229, 18171, 18148, 18170])

It is important that the classes are balanced in both the training and testing sets. Since we are assuming that classification is equally important for all classes in the dataset, a balanced training set means that a classifier has a roughly equal number of instances from each class to train on and learn how to identify. A balanced training set means that the accuracy of the predictions on the training set is actually representative of the performance of the model, according to the criteria that the classes are equally important.

## 2. Preprocessing

### 2.1 Unprocessed data testing

In [None]:
from scipy.special import expit

class TwoLayerPerceptronBase(object):
    def __init__(self, n_hidden=30,
                 C=0.0, epochs=500, eta=0.001, random_state=None):
        np.random.seed(random_state)
        self.n_hidden = n_hidden
        self.l2_C = C
        self.epochs = epochs
        self.eta = eta
        
    @staticmethod
    def _encode_labels(y):
        """Encode labels into one-hot representation"""
        onehot = pd.get_dummies(y).values.T
            
        return onehot

    def _initialize_weights(self):
        """Initialize weights with small random numbers."""
        W1_num_elems = (self.n_features_)*self.n_hidden
        W1 = np.random.uniform(-1.0, 1.0, size=W1_num_elems)
        W1 = W1.reshape(self.n_hidden, self.n_features_) # reshape to be W
        b1 = np.zeros((self.n_hidden, 1))
        
        W2_num_elems = (self.n_hidden)*self.n_output_
        W2 = np.random.uniform(-1.0, 1.0, size=W2_num_elems)
        W2 = W2.reshape(self.n_output_, self.n_hidden)
        b2 = np.zeros((self.n_output_, 1))
        
        return W1, W2, b1, b2
    
    @staticmethod
    def _sigmoid(z):
        """Use scipy.special.expit to avoid overflow"""
        # 1.0 / (1.0 + np.exp(-z))
        return expit(z)
    
    
    @staticmethod
    def _L2_reg(lambda_, W1, W2):
        """Compute L2-regularization cost"""
        # only compute for non-bias terms
        return (lambda_/2.0) * np.sqrt(np.mean(W1[:, 1:] ** 2) + np.mean(W2[:, 1:] ** 2))
    
    def _cost(self,A3,Y_enc,W1,W2):
        '''Get the objective function value'''
        cost = np.mean((Y_enc-A3)**2)
        L2_term = self._L2_reg(self.l2_C, W1, W2)
        return cost + L2_term
    
    def _feedforward(self, X, W1, W2, b1, b2):
        """Compute feedforward step
        -----------
        X : Input layer with original features.
        W1: Weight matrix for input layer -> hidden layer.
        W2: Weight matrix for hidden layer -> output layer.
        ----------
        a1-a3 : activations into layer (or output layer)
        z1-z2 : layer inputs 

        """
        A1 = X.T
        Z1 = W1 @ A1 + b1
        A2 = self._sigmoid(Z1)
        Z2 = W2 @ A2 + b2
        A3 = self._sigmoid(Z2)
        return A1, Z1, A2, Z2, A3
    
    def _get_gradient(self, A1, A2, A3, Z1, Z2, Y_enc, W1, W2):
        """ Compute gradient step using backpropagation.
        """
        # vectorized backpropagation
        V2 = -2*(Y_enc-A3)*A3*(1-A3)
        V1 = A2*(1-A2)*(W2.T @ V2)
        
        gradW2 = V2 @ A2.T
        gradW1 = V1 @ A1.T
        
        gradb2 = np.sum(V2, axis=1).reshape((-1,1))
        gradb1 = np.sum(V1, axis=1).reshape((-1,1))
        
        
        # regularize weights that are not bias terms
        gradW1 += W1 * self.l2_C
        gradW2 += W2 * self.l2_C

        return gradW1, gradW2, gradb1, gradb2
    
    def predict(self, X):
        """Predict class labels"""
        _, _, _, _, A3 = self._feedforward(X, self.W1, self.W2, self.b1, self.b2)
        y_pred = np.argmax(A3, axis=0)
        return y_pred