<a href="https://colab.research.google.com/github/ewu2023/CS589-HW4/blob/main/CS589_NeuralNets.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Mount Google Drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Import Packages

In [25]:
import numpy as np
import pandas as pd
from scipy.stats import norm

# For reading/writing to files
import json
import csv
import re

# Define Neural Network Class

In [18]:
class NeuralNetwork():
    def __init__(self, networkShape, trainData: pd.DataFrame, classLabels, weights=None, regParam=0, alpha=0.01, epsilon=0.01, debugFlag=False):
        """
        Constructor for the neural network class.
        
        networkShape: A list of integers that contains the number of neurons to use in each layer

        trainData: The data set that will be used to train the model

        weights: A list of weight matrices for each hidden layer. If initialized to None, the constructor will assign random weights
        """

        """
        Process training data
        """
        self.trainData = trainData

        
        # Get a copy of the training data without the labels
        self.noLabelTrainData = trainData.loc[:, ~trainData.columns.isin(classLabels)]
        self.classLabels = classLabels

        # Encode class vectors
        self.classVectors = {}
        for i in range(len(trainData)):
            # Get current row from training data
            row = trainData.iloc[i]

            # Iterate over all classes and assign values
            classVector = {}
            for label in classLabels:
                expVal = row[label]
                classVector[label] = [expVal]

            # Convert class vector to numpy array
            classVecDf = pd.DataFrame(classVector)

            # Append class vector to dictionary of vectors
            # Implementation uses column vectors, so we take transpose here
            self.classVectors[i] = (classVecDf.to_numpy()).T

        self.networkShape = networkShape

        # Set the value of the regularization parameter
        self.regParam = regParam

        # Set the step size for gradient descent
        self.alpha = alpha

        # Set value of epsilon for stopping condition
        self.epsilon = epsilon

        # Set debug flag to show output of intermediate computations
        self.debugFlag = debugFlag

        # Instance variable storing weight matrices
        self.layers = []
        
        # Initialize layers
        for i in range(len(networkShape) - 1):
            # Get the number of neurons in the current and next layers
            numCurLayer = networkShape[i] + 1 # Account for neurons + bias term in current layer
            numNextLayer = networkShape[i + 1]

            # Initialize matrix for the current layer
            # Number of rows = number of neurons in layer i + 1
            # Number of columns = number of neurons in layer i
            layerMatrix = np.zeros(shape=(numNextLayer, numCurLayer))
            if weights:
                layerMatrix = weights[i]
            else:
                self._init_matrix(layerMatrix)
            
            # Append current layer to the list of layers
            self.layers.append(layerMatrix)
        
    def _init_matrix(self, matrix: np.ndarray):
        rows, cols = matrix.shape
        for i in range(rows):
            for j in range(cols):
                matrix[i, j] = norm.rvs()
    
    # Definition for the sigmoid function
    def sigmoid(self, x):
        return (1 / (1 + np.exp(-x)))
    
    # Compute activation vector
    def compute_activation_vector(self, weightedSums: np.ndarray):
        numRows, numCols = weightedSums.shape
        activationVector = np.zeros(shape=(numRows, numCols))

        for i in range(numRows):
            # Get weighted sum from i-th row
            x = weightedSums[i, 0]

            # Compute output of sigmoid function and place it in activation vector
            activationVector[i, 0] = self.sigmoid(x)
        
        return activationVector
    
    # Method for propagating forward one instance
    def propagate_one(self, instance, activations=None, printOut=False):
        # Add a bias term to the instance
        instanceAsNP = instance.to_numpy()
        instanceVector = np.concatenate(([1], instanceAsNP))
        
        # Make instance vector a column vector
        instanceVector = np.atleast_2d(instanceVector).T
        
        # Iterate over each layer and compute activations for each neuron
        prevActivation = instanceVector # Keep track of the activation vector for previous layer

        if activations != None: # If activations is not None, append current activation
            activations.append(prevActivation)

        # If the debug flag was set, print out the first instance vector
        if printOut:
            print(f"Value of a0:\n{prevActivation}\n")

        for i in range(len(self.layers) - 1):
            # Get current weight matrix
            curTheta = self.layers[i]

            # Compute weighted sum vector (z-matrix): Theta^{l=i-1} * a^{l=i-1}
            z = np.matmul(curTheta, prevActivation)

            # Compute activation vector of current layer
            curActivationVec = self.compute_activation_vector(z)
            
            # Add bias term to current activation vector
            curActivationVec = np.concatenate(([[1]], curActivationVec)) # Prepend 1 to vector

            # Update previous activation vector
            prevActivation = curActivationVec

            # Append current activation to list
            if activations != None:
                activations.append(curActivationVec)

            # Print results of computation at this step if debug flag is on
            if printOut:
                print(f"Value of z{i + 1}:\n{z}")
                print(f"Value of a{i + 1}:\n{curActivationVec}\n")
        
        # Compute activation at the final layer
        lastTheta = self.layers[len(self.layers) - 1]
        lastZMat = np.matmul(lastTheta, prevActivation)
        outputVector = self.compute_activation_vector(lastZMat)

        # If the debug flag was set, print results of this final computation
        if printOut:
            print(f"Value of z{len(self.layers)}:\n{lastZMat}")
            print(f"Value of a{len(self.layers)}:\n{outputVector}\n")

        if activations != None:
            activations.append(outputVector)

        # Return as a vector in the event there are multiple outputs
        return outputVector

    # Compute error for an individual output of the neural network
    def compute_one_instance_err(self, expVal, predVal):
        return -expVal * np.log(predVal) - (1 - expVal) * np.log(1 - predVal)

    # Helper method for computing regularized error
    def compute_error(self, printOut=False):
        # Keep track of total error across all training instances
        totalErr = 0

        # Iterate over all training instances
        for i in range(len(self.noLabelTrainData)):
            # Get current instance and perform forward propagation on it
            curInstance = self.noLabelTrainData.iloc[i]
            predVector = self.propagate_one(curInstance, printOut=False) # Will be a vector

            # Get the expected values vector
            expVector = self.classVectors[i]

            # Compute error vector
            vectorizedErrorFunc = np.vectorize(self.compute_one_instance_err)
            errVector = vectorizedErrorFunc(expVector, predVector)

            if printOut:
                print(f"Cost associated with Instance {i}: {np.sum(errVector)}\n")
            # Sum all elements of error vector, then add it to total error
            totalErr += np.sum(errVector)
        
        # Compute average error
        avgErr = totalErr / len(self.trainData)

        """ Compute the squared sum of all weights in the network """
        weightSqSum = 0
        for weightMatrix in self.layers:
            # Square all of the weights
            squaredMatrix = np.multiply(weightMatrix, weightMatrix)

            # Drop bias terms from squared matrix
            rows, cols = squaredMatrix.shape
            zeroColumn = np.zeros(rows)
            squaredMatrix[:, 0] = zeroColumn

            # Add all columns, then add each column's total to get the total sum for this matrix
            colSums = np.sum(squaredMatrix, axis=0)
            matrixTotal = np.sum(colSums)

            # Add matrix total to sum of the weights squared
            weightSqSum += matrixTotal
        
        # Regularize error
        weightSqSum *= (self.regParam / (2 * len(self.trainData)))

        # Return error + regularization term
        return avgErr + weightSqSum
    
    def backpropagate(self):
        # Determine current error across entire data set 
        prevErr = self.compute_error(printOut=False)
        converged = False
        while not converged: 
            # Initialize gradients for each layer
            # Accumulate the gradients in this list
            gradients = []
            for layer in self.layers:
                # Get the shape of each layer
                rows, cols = layer.shape
                gradients.append(np.zeros(shape=(rows,cols)))


            # Iterate over all instances in the training data
            for i in range(len(self.noLabelTrainData)):
                print(f"--- Propagating Instance {i} ---\n")

                # Propagate current instance through the network
                curInstance = self.noLabelTrainData.iloc[i]
                activations_i = []
                self.propagate_one(curInstance, activations=activations_i, printOut=self.debugFlag)

                # Compute delta values for output layer
                outputVector = activations_i[len(activations_i) - 1]
                if self.debugFlag:
                    print(f"Instance {i} Activation: {outputVector.T}\n")

                expectedVector = self.classVectors[i]

                delta_vectors = []
                delta_vectors.append(outputVector - expectedVector)

                # Iterate over each layer and compute delta values
                for k in range(len(self.layers) - 1, 0, -1):
                    # Get current weight matrix
                    weightMatrix = self.layers[k]
                    delta_next = delta_vectors[0]

                    # Compute delta values for nodes in current layer
                    a = np.matmul(weightMatrix.T, delta_next)
                    b = np.multiply(a, activations_i[k])
                    delta_k = np.multiply(b, (1 - activations_i[k]))

                    # Remove bias term from delta_k
                    delta_k = np.delete(delta_k, 0, 0)

                    # Append to delta vectors
                    delta_vectors.insert(0, delta_k)

                
                # Accumulate the gradients for each layer
                if self.debugFlag:
                    print(f"Instance {i} Deltas:\n")
                    for deltaIndex in range(len(delta_vectors)):
                        print(f"Deltas for Layer {deltaIndex}:\n{delta_vectors[deltaIndex]}\n")
                
                for j in range(len(self.layers) - 1, -1, -1):
                    curGradLayer = gradients[j]

                    # Compute gradients of this instance for current layer
                    gradMatrix = np.matmul(delta_vectors[j], activations_i[j].T)
                    if self.debugFlag:
                        print(f"Gradients of Theta{j} on instance {i}:\n{gradMatrix}\n")

                    # Accumulate the gradients
                    curGradLayer = curGradLayer + gradMatrix
                    gradients[j] = curGradLayer

            # Iterate over each layer and compute gradient + regularization factor
            for i in range(len(self.layers) - 1, -1, -1):
                regFactor = np.multiply(self.regParam, self.layers[i])
                # Set first col of regFactor to 0s
                rows, cols = regFactor.shape
                regFactor[:,0] = np.zeros(rows)

                curGradLayer = gradients[i]

                curGradLayer = (1 / len(self.trainData)) * (curGradLayer + regFactor)
                gradients[i] = curGradLayer

            # Print error cost and final gradients
            if self.debugFlag:
                print("----------")
                print(f"Final (regularized) cost J based on Entire Set: {self.compute_error(printOut=True)}")
                print(f"Final Average, Regularized Gradients:\n")
                for gradIndex in range(len(gradients)):
                    print(f"Final Regularized gradients of Theta{gradIndex}:\n{gradients[gradIndex]}\n")

            # Update weights according to gradients
            for i in range(len(self.layers) - 1, -1, -1):
                self.layers[i] = self.layers[i] - self.alpha * gradients[i]
            
            # Compute current error and check for stopping condition
            curErr = self.compute_error(printOut=False)
            if np.absolute(prevErr - curErr) <= self.epsilon:
                # If the difference in error is equal to or smaller than
                # the given value of epsilon, consider the network as converged
                converged = True
                continue
            
            prevErr = curErr

Test Neural Net

In [20]:
def backprop_example_1():
    d = {
        'x': [0.13000, 0.42000], 
        'class': [0.90000, 0.23000]
    }

    df = pd.DataFrame(data=d)
    df_noLabels = df.loc[:, df.columns!='class']

    networkShape = [1, 2, 1]
    weights = [
        np.array(
            [[0.40000, 0.10000],
            [0.30000, 0.20000]],
        ),

        np.array([[0.70000, 0.50000, 0.60000]])
    ]

    classLabels = ['class']
    network = NeuralNetwork(networkShape, trainData=df, classLabels=classLabels, weights=weights, debugFlag=True)
    network.backpropagate()

def backprop_example_2():
    # Pre-process data
    d = {
        "x1": [0.32000, 0.83000],
        "x2": [0.68000, 0.02000],
        "y1": [0.75000, 0.75000],
        "y2": [0.98000, 0.28000]
    }

    classLabels = ["y1", "y2"]

    df = pd.DataFrame(d)
    df_noLabels = df.loc[:, ~df.columns.isin(classLabels)]
    
    # Initialize network
    networkShape = [2, 4, 3, 2]
    weights = [
        np.array([
            [0.42000, 0.15000, 0.40000],
            [0.72000, 0.10000, 0.54000],
            [0.01000, 0.19000, 0.42000],
            [0.30000, 0.35000, 0.68000]
        ]),

        np.array([
            [0.21000, 0.67000, 0.14000, 0.96000, 0.87000],
            [0.87000, 0.42000, 0.20000, 0.32000, 0.89000],
            [0.03000, 0.56000, 0.80000, 0.69000, 0.09000]
        ]),

        np.array([
            [0.04000,  0.87000,  0.42000,  0.53000],
            [0.17000,  0.10000,  0.95000,  0.69000]
        ])
    ]

    network = NeuralNetwork(
        networkShape, 
        df, 
        classLabels=classLabels, 
        weights=weights, 
        regParam=0.25, 
        debugFlag=True
    )

    network.backpropagate()

# Data Utilities

Load CSV Helper Function

In [37]:
# Regular expressions for integers and floats
FLOAT_REGEX = "[-+]?[0-9]+\.[0-9]+"
INTEGER_REGEX = "[-+]?[0-9]+"
def loadCSV(filename, delimeter):
    with open (filename) as csvfile:
        reader = csv.DictReader(csvfile, delimiter=delimeter)
        i = 0
        data_table = []
        for row in reader:
            # Create a new row in the data table
            data_table.append({})
            cur_table_row = data_table[i]

            # Copy over each column volume into the new row
            for col in row:
                col_lower = col.lower()
                if "class" in col_lower:
                    cur_table_row['class'] = row[col]
                else:
                    cur_table_row[col_lower] = row[col]
            i += 1
        
        df = pd.DataFrame(data_table)
        return df

Assign Data Types to Attributes

In [62]:
# Attempts to assign data types to each column and value, depending on format
def parseTypes(df: pd.DataFrame, attrTypes=None):
    data_table = []
    for i in range(len(df)):
        # Get current row in dataframe
        row = df.iloc[i]

        # Create new row in the table
        data_table.append({})
        cur_table_row = data_table[i]

        # Copy over each column volume into the new row
        for col in df:
            col_lower = col.lower()
            if 'class' in col_lower:
                # Copy value of class to current row in table
                cur_table_row['class'] = row[col]
            else:
                cur_value = row[col]
                colType = None
                # Check if a type for the current attribute was specified
                if attrTypes:
                    if col in attrTypes:
                        colType = attrTypes[col]
                
                if colType == 'categorical' or colType == None:
                    # Store value as a string
                    cur_table_row[col_lower] = cur_value
                else:
                    if re.fullmatch(FLOAT_REGEX, cur_value):
                        # Store this value as a float
                        cur_table_row[col_lower] = float(cur_value)
                    else:
                        # Store this value as an integer
                        cur_table_row[col_lower] = int(cur_value)
    
    # Return the data frame with the formatted values
    formattedDf = pd.DataFrame(data_table)
    return formattedDf

# Test Code

In [34]:
# target_dir = "/content/drive/My Drive/"
# filename = "blarg.json"
# fileRoute = f"{target_dir}/{filename}"

# with open(fileRoute, 'w') as outfile:
#     some_data = {
#         "a1": [[1, 2], [3, 4]],
#         "b1": [[5, 6], [7, 8]]
#     }

#     outfile.write(json.dumps(some_data))

# Compute error for an individual output of the neural network
def compute_one_instance_err(expVal, predVal):
    return -expVal * np.log(predVal) - (1 - expVal) * np.log(1 - predVal)

def multiply(a, b):
    return a * b


vectorFunction = np.vectorize(multiply)
a = np.array([
    [1, 2],
    [3, 4]
])

colSum = np.sum(a, axis=0)
rowSum = np.sum(a, axis=1)

print(colSum)
print(rowSum)
print(f"Sum of all elements in matrix: {np.sum(colSum)}")

b = np.array([
    [3],
    [7]
])

print(f"b-vector elements squared:\n{np.multiply(b, b)}")
# print(f"b-vector - b-vector: {b - b}")
print(f"{2 * b}")

[4 6]
[3 7]
Sum of all elements in matrix: 10
b-vector elements squared:
[[ 9]
 [49]]
[[ 6]
 [14]]


Test Open File in Google Drive

In [67]:
target_dir = "/content/drive/My Drive/Colab Notebooks/data"
wineCSV = f"{target_dir}/hw3_wine.csv"
houseCSV = f"{target_dir}/hw3_house_votes_84.csv"

# wine_data = loadCSV(wineCSV, '\t')
# print(pd.get_dummies(wine_data))

house_data = loadCSV(houseCSV, ',')
attrTypes = {'water-project-cost-sharing': 'numerical'}
house_data = parseTypes(house_data, attrTypes=attrTypes)

print(pd.get_dummies(house_data))

     water-project-cost-sharing  ﻿#handicapped-infants_0  \
0                             2                        0   
1                             2                        0   
2                             2                        1   
3                             2                        0   
4                             2                        0   
..                          ...                      ...   
430                           1                        0   
431                           1                        0   
432                           0                        0   
433                           1                        0   
434                           2                        0   

     ﻿#handicapped-infants_1  ﻿#handicapped-infants_2  \
0                          1                        0   
1                          1                        0   
2                          0                        0   
3                          1                       

Test Pandas Dummies

In [None]:
data = {
    "temperature": ["hot", "cool", "hot"],
    "weather": ["sunny", "overcast", "sunny"],
    "humidity": ["high", "normal", "high"],
    "class": ["no", "yes", "no"]
}

tennisDf = pd.DataFrame(data)
encodedDf = pd.get_dummies(tennisDf)
for i in range(len(encodedDf)):
    print(f"{encodedDf.iloc[i]}\n")

temperature_cool    0
temperature_hot     1
weather_overcast    0
weather_sunny       1
humidity_high       1
humidity_normal     0
class_no            1
class_yes           0
Name: 0, dtype: uint8

temperature_cool    1
temperature_hot     0
weather_overcast    1
weather_sunny       0
humidity_high       0
humidity_normal     1
class_no            0
class_yes           1
Name: 1, dtype: uint8

temperature_cool    0
temperature_hot     1
weather_overcast    0
weather_sunny       1
humidity_high       1
humidity_normal     0
class_no            1
class_yes           0
Name: 2, dtype: uint8

