In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [2]:
np.random.seed(42)

data = pd.read_csv('final_gentrification_dataset_with_labels.csv')

# get features ready 
change_features = [
    'income_diversity', 'racial_diversity', 'bachelor_or_higher', 'crowding_rate',
    'pop_above_65', 'under_18', 'homeownership', 'born_in_ny_rate'
]
for feature in change_features:
    data[f'{feature}_change'] = data[f'{feature}_2019'] - data[f'{feature}_2017']


features = [f'{feature}_change' for feature in change_features] + [
    'ela_grade4_2017', 'math_grade4_2017', 'foreclosure_rate_2017', 'park_access_2017'
]
X = data[features].values
y = data['gentrification_label'].values

# one hot encode y feature
le = LabelEncoder()
y_encoded = le.fit_transform(y)
y_one_hot = np.zeros((len(y), 3))
y_one_hot[np.arange(len(y)), y_encoded] = 1


# scale x features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_one_hot, test_size=0.2, random_state=42, stratify=y)

test_indices = data.index[-len(X_test):] if not hasattr(X_test, 'index') else X_test.index
test_data = data.iloc[test_indices].copy()
test_data['actual_label'] = le.inverse_transform(np.argmax(y_test, axis=1))


class_weights = np.array([1.5, 1.0, 1.5])


d = X_train.shape[1] 
h1 = 16  # number of neurons in 1st layer
h2 = 8   # number of neurons in 2nd layer
o = 3 

W1 = np.random.randn(d, h1) * 0.01 
b1 = np.zeros((h1, 1))             
W2 = np.random.randn(h1, h2) * 0.01 
b2 = np.zeros((h2, 1))              
W3 = np.random.randn(h2, o) * 0.01  
b3 = np.zeros((o, 1))               


def relu(x):
    return np.maximum(0, x)

def relu_deriv(x):
    return np.heaviside(x, 0)

def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=0, keepdims=True))
    return exp_x / np.sum(exp_x, axis=0, keepdims=True)



def compute_loss(y_true, y_pred, class_weights):
    y_true = y_true.reshape(-1, 3)
    y_pred = np.clip(y_pred, 1e-15, 1 - 1e-15)
    loss = -np.sum(y_true * np.log(y_pred) * class_weights) / y_true.shape[0]
    return loss


# Experimenting with Learning Rate

In [4]:

def build_and_train_model(eta):
    np.random.seed(42)
    n = X_train.shape[0] 
    d = X_train.shape[1] 
    h1 = 16 
    h2 = 8
    o = 3
    
    W1 = np.random.randn(d, h1) * 0.01
    b1 = np.zeros((h1, 1))
    W2 = np.random.randn(h1, h2) * 0.01
    b2 = np.zeros((h2, 1))
    W3 = np.random.randn(h2, o) * 0.01
    b3 = np.zeros((o, 1))
    
    epochs = 400
    n = X_train.shape[0]
    
    for epoch in range(epochs):
        loss = 0
        dW1, db1 = np.zeros_like(W1), np.zeros_like(b1)
        dW2, db2 = np.zeros_like(W2), np.zeros_like(b2)
        dW3, db3 = np.zeros_like(W3), np.zeros_like(b3)
        
        for i in range(n):
            x = X_train[i].reshape(d, 1)
            y_true = y_train[i].reshape(o, 1)
                 
            z1 = np.dot(W1.T, x) + b1
            h1 = relu(z1)
            z2 = np.dot(W2.T, h1) + b2
            h2 = relu(z2)
            z3 = np.dot(W3.T, h2) + b3
            y_pred = softmax(z3)
            y_pred, h1, h2, z1, z2
            
            loss += compute_loss(y_true, y_pred, class_weights)
            
            dz3 = y_pred - y_true
            dW3 += np.dot(h2,dz3.T)
            db3 += dz3
            
            dh2 = np.dot(W3, dz3)
            dz2 = dh2 * relu_deriv(z2)
            dW2 += np.dot(h1,dz2.T)
            db2 += dz2
            
            dh1 = np.dot(W2, dz2)
            dz1 = dh1 * relu_deriv(z1)
            dW1 += np.dot(x,dz1.T)
            db1 += dz1
        
        dW1 /= n
        db1 /= n
        dW2 /= n
        db2 /= n
        dW3 /= n
        db3 /= n
        
        W1 -= eta * dW1
        b1 -= eta * db1
        W2 -= eta * dW2
        b2 -= eta * db2
        W3 -= eta * dW3
        b3 -= eta * db3
        
    
    y_pred_test = []
    for i in range(X_test.shape[0]):
        x = X_test[i].reshape(d, 1)
        z1 = np.dot(W1.T, x) + b1
        h1 = relu(z1)
        z2 = np.dot(W2.T, h1) + b2
        h2 = relu(z2)
        z3 = np.dot(W3.T, h2) + b3
        y_pred = softmax(z3)
        y_pred_test.append(np.argmax(y_pred))
    y_pred_test = np.array(y_pred_test)
    y_test_labels = np.argmax(y_test, axis=1)
    
    accuracy = accuracy_score(y_test_labels, y_pred_test)
    conf_matrix = confusion_matrix(y_test_labels, y_pred_test)
    class_report = classification_report(y_test_labels, y_pred_test, labels=[0, 1, 2], target_names=le.classes_, zero_division=0)
    
    test_data['predicted_label'] = le.inverse_transform(y_pred_test)
    
    print(f"Learning Rate: {eta}")
    print("Classification Report:\n", class_report)
    
    return accuracy

learning_rates = [0.0001, 0.001, 0.005, 0.01]


results = []


for eta in learning_rates:
    print(f"\n eta = {eta}")
    accuracy = build_and_train_model(eta)
    results.append((eta, accuracy))



print("\nSummary of eta Experiment ")
for eta, acc in results:
    print(f"Learning Rate: {eta}, Test Accuracy: {acc:.2f}")


 eta = 0.0001
Learning Rate: 0.0001
Classification Report:
                  precision    recall  f1-score   support

    Gentrifying       0.00      0.00      0.00         3
  Higher-Income       0.55      1.00      0.71         6
Non-Gentrifying       0.00      0.00      0.00         2

       accuracy                           0.55        11
      macro avg       0.18      0.33      0.24        11
   weighted avg       0.30      0.55      0.39        11


 eta = 0.001
Learning Rate: 0.001
Classification Report:
                  precision    recall  f1-score   support

    Gentrifying       0.00      0.00      0.00         3
  Higher-Income       0.55      1.00      0.71         6
Non-Gentrifying       0.00      0.00      0.00         2

       accuracy                           0.55        11
      macro avg       0.18      0.33      0.24        11
   weighted avg       0.30      0.55      0.39        11


 eta = 0.005
Learning Rate: 0.005
Classification Report:
                  

# Experimenting with Dropout Rate and adding Batches


In [6]:
#reusable function for dropout
def apply_dropout(h, p):
    if p > 0:
        mask = np.random.binomial(1, 1-p, size=h.shape)
        h_dropped = h * mask / (1-p)
        return h_dropped, mask
    return h, np.ones_like(h)


In [7]:
# increase class weighting to try to componsate for inbalanced data set
class_weights = np.array([2.0, 1.0, 2.0])  # Gentrifying, Higher-Income, Non-Gentrifying

def build_and_train_model(dropout_rate):
    np.random.seed(42)
    d = X_train.shape[1]
    h1 = 16
    h2 = 8
    o = 3
    
    W1 = np.random.randn(d, h1) / np.sqrt(d)
    b1 = np.zeros((h1, 1))
    W2 = np.random.randn(h1, h2) / np.sqrt(h1)
    b2 = np.zeros((h2, 1))
    W3 = np.random.randn(h2, o) / np.sqrt(h2)
    b3 = np.zeros((o, 1))
    
    eta = 0.01 # all learning rates preformed the same so pick .01 for now, will revisit at the end
    epochs = 1000
    n = X_train.shape[0]
    batch_size = 8 
    val_accuracies = []
    
    for epoch in range(epochs):
        indices = np.random.permutation(n)
        X_train_shuffled = X_train[indices]
        y_train_shuffled = y_train[indices]
        
        for start in range(0, n, batch_size):
            end = min(start + batch_size, n)
            X_batch = X_train_shuffled[start:end]
            y_batch = y_train_shuffled[start:end]
            
            dW1, db1 = np.zeros_like(W1), np.zeros_like(b1)
            dW2, db2 = np.zeros_like(W2), np.zeros_like(b2)
            dW3, db3 = np.zeros_like(W3), np.zeros_like(b3)
            loss = 0
            
            for i in range(X_batch.shape[0]):
                x = X_batch[i].reshape(d, 1)
                y_true = y_batch[i].reshape(o, 1)
                
                z1 = np.dot(W1.T,x) + b1
                h1 = relu(z1)
                h1, mask1 = apply_dropout(h1, dropout_rate)
    
                z2 = np.dot(W2.T, h1) + b2
                h2 = relu(z2)
                h2, mask2 = apply_dropout(h2, dropout_rate)
    
                z3 = np.dot(W3.T,h2) + b3
                y_pred = softmax(z3)
                
                loss += compute_loss(y_true, y_pred, class_weights)
                
                dz3 = y_pred - y_true
                dW3 += np.dot(h2, dz3.T)
                db3 += dz3
                
                dh2 = np.dot(W3 , dz3)
                dh2 *= mask2 / (1-dropout_rate)
                dz2 = dh2 * relu_deriv(z2)
                dW2 +=  np.dot(h1,dz2.T)
                db2 += dz2
                
                dh1 = np.dot(W2, dz2)
                dh1 *= mask1 / (1-dropout_rate)
                dz1 = dh1 * relu_deriv(z1)
                dW1 += np.dot(x , dz1.T)
                db1 += dz1
            
            dW1 /= X_batch.shape[0]
            db1 /= X_batch.shape[0]
            dW2 /= X_batch.shape[0]
            db2 /= X_batch.shape[0]
            dW3 /= X_batch.shape[0]
            db3 /= X_batch.shape[0]
            
            W1 -= eta * dW1
            b1 -= eta * db1
            W2 -= eta * dW2
            b2 -= eta * db2
            W3 -= eta * dW3
            b3 -= eta * db3
        
    y_pred_test = []
    for i in range(X_test.shape[0]):
        x = X_test[i].reshape(d, 1)
        z1 = np.dot(W1.T,x) + b1
        h1 = relu(z1)
    
        z2 = np.dot(W2.T, h1) + b2
        h2 = relu(z2)
    
        z3 = np.dot(W3.T,h2) + b3
        y_pred = softmax(z3)        
        y_pred_test.append(np.argmax(y_pred))
    y_pred_test = np.array(y_pred_test)
    y_test_labels = np.argmax(y_test, axis=1)
    
    accuracy = accuracy_score(y_test_labels, y_pred_test)
    conf_matrix = confusion_matrix(y_test_labels, y_pred_test)
    class_report = classification_report(y_test_labels, y_pred_test, labels=[0, 1, 2], target_names=le.classes_, zero_division=0)
    
    test_data_copy = test_data.copy()  # Avoid overwriting
    test_data_copy['predicted_label'] = le.inverse_transform(y_pred_test)
    
    print(f"\nDropout Rate: {dropout_rate}")
    print("Classification Report:\n", class_report)
    
    return accuracy, test_data_copy

# Dropout rates to test
dropout_rates = [0.0, 0.1, 0.3, 0.5]


results = []
test_data_all = []

for dropout_rate in dropout_rates:
    print(f"\ndropout rate = {dropout_rate}")
    accuracy, test_data_result = build_and_train_model(dropout_rate)
    results.append((dropout_rate, accuracy))
    test_data_all.append(test_data_result)

print("\nDropout Rate exp results:")
for dropout_rate, acc in results:
    print(f"Dropout Rate: {dropout_rate}, Test Accuracy: {acc:.2f}")


dropout rate = 0.0

Dropout Rate: 0.0
Classification Report:
                  precision    recall  f1-score   support

    Gentrifying       1.00      0.67      0.80         3
  Higher-Income       0.83      0.83      0.83         6
Non-Gentrifying       0.33      0.50      0.40         2

       accuracy                           0.73        11
      macro avg       0.72      0.67      0.68        11
   weighted avg       0.79      0.73      0.75        11


dropout rate = 0.1

Dropout Rate: 0.1
Classification Report:
                  precision    recall  f1-score   support

    Gentrifying       0.67      0.67      0.67         3
  Higher-Income       0.80      0.67      0.73         6
Non-Gentrifying       0.33      0.50      0.40         2

       accuracy                           0.64        11
      macro avg       0.60      0.61      0.60        11
   weighted avg       0.68      0.64      0.65        11


dropout rate = 0.3

Dropout Rate: 0.3
Classification Report:
        

# Experimening with Number of Neurons 

In [9]:

def build_and_train_model(neurons):
    np.random.seed(42)
    h1, h2 = neurons 
    d = X_train.shape[1] 
    o = 3 
    
    W1 = np.random.randn(d, h1) / np.sqrt(d)
    b1 = np.zeros((h1, 1))
    W2 = np.random.randn(h1, h2) / np.sqrt(h1)
    b2 = np.zeros((h2, 1))
    W3 = np.random.randn(h2, o) / np.sqrt(h2)
    b3 = np.zeros((o, 1))
    
    eta = 0.01 # from exp 1
    dropout_rate = 0.3  # from exp 2, again all had similar but pick .3 for a more balanced appraoch to be safer
    epochs = 1000
    n = X_train.shape[0]
    batch_size = 8
    val_accuracies = []
    
    for epoch in range(epochs):
        
        indices = np.random.permutation(n)
        X_train_shuffled = X_train[indices]
        y_train_shuffled = y_train[indices]
        
        for start in range(0, n, batch_size):
            end = min(start + batch_size, n)
            X_batch = X_train_shuffled[start:end]
            y_batch = y_train_shuffled[start:end]
            
            dW1, db1 = np.zeros_like(W1), np.zeros_like(b1)
            dW2, db2 = np.zeros_like(W2), np.zeros_like(b2)
            dW3, db3 = np.zeros_like(W3), np.zeros_like(b3)
            loss = 0
            
            for i in range(X_batch.shape[0]):
                x = X_batch[i].reshape(d, 1)
                y_true = y_batch[i].reshape(o, 1)
                
                z1 = np.dot(W1.T,x) + b1
                h1 = relu(z1)
                h1, mask1 = apply_dropout(h1, dropout_rate)
    
                z2 = np.dot(W2.T, h1) + b2
                h2 = relu(z2)
                h2, mask2 = apply_dropout(h2, dropout_rate)
    
                z3 = np.dot(W3.T,h2) + b3
                y_pred = softmax(z3)
    
                
                loss += compute_loss(y_true, y_pred, class_weights)
                
                dz3 = y_pred - y_true
                dW3 += np.dot(h2, dz3.T)
                db3 += dz3
                
                dh2 = np.dot(W3, dz3)
                dh2 *= mask2 / (1-dropout_rate)
                dz2 = dh2 * relu_deriv(z2)
                dW2 += np.dot(h1,dz2.T)
                db2 += dz2
                
                dh1 = np.dot(W2,dz2)
                dh1 *= mask1 / (1-dropout_rate)
                dz1 = dh1 * relu_deriv(z1)
                dW1 += np.dot(x ,dz1.T)
                db1 += dz1
            
            dW1 /= X_batch.shape[0]
            db1 /= X_batch.shape[0]
            dW2 /= X_batch.shape[0]
            db2 /= X_batch.shape[0]
            dW3 /= X_batch.shape[0]
            db3 /= X_batch.shape[0]
            
            W1 -= eta * dW1
            b1 -= eta * db1
            W2 -= eta * dW2
            b2 -= eta * db2
            W3 -= eta * dW3
            b3 -= eta * db3
        
    y_pred_test = []
    for i in range(X_test.shape[0]):
        x = X_test[i].reshape(d, 1)
        z1 = np.dot(W1.T, x) + b1
        h1 = relu(z1)
            
        z2 = np.dot(W2.T, h1) + b2
        h2 = relu(z2)
    
        z3 = np.dot(W3.T, h2) + b3
        y_pred = softmax(z3)
        y_pred_test.append(np.argmax(y_pred))
    y_pred_test = np.array(y_pred_test)
    y_test_labels = np.argmax(y_test, axis=1)
    
    accuracy = accuracy_score(y_test_labels, y_pred_test)
    conf_matrix = confusion_matrix(y_test_labels, y_pred_test)
    class_report = classification_report(y_test_labels, y_pred_test, labels=[0, 1, 2], target_names=le.classes_, zero_division=0)
    
    test_data_copy = test_data.copy()  # Avoid overwriting
    test_data_copy['predicted_label'] = le.inverse_transform(y_pred_test)
    
    print(f"\nNeurons: {neurons}")
    print("Classification Report:\n", class_report)
    
    return accuracy, test_data_copy

neuron_configs = [(8, 4), (16, 8), (32, 16), (64, 32)]
results = []
test_data_all = []


for neurons in neuron_configs:
    print(f"\nRunning experiment with neurons: {neurons}")
    accuracy, test_data_result = build_and_train_model(neurons)
    results.append((neurons, accuracy))
    test_data_all.append(test_data_result)



print("\nNeuron Configuration Exp Results:")
for neurons, acc in results:
    print(f"Neurons: {neurons}, Test Accuracy: {acc:.2f}")


Running experiment with neurons: (8, 4)

Neurons: (8, 4)
Classification Report:
                  precision    recall  f1-score   support

    Gentrifying       0.00      0.00      0.00         3
  Higher-Income       0.50      0.83      0.62         6
Non-Gentrifying       0.00      0.00      0.00         2

       accuracy                           0.45        11
      macro avg       0.17      0.28      0.21        11
   weighted avg       0.27      0.45      0.34        11


Running experiment with neurons: (16, 8)

Neurons: (16, 8)
Classification Report:
                  precision    recall  f1-score   support

    Gentrifying       1.00      0.67      0.80         3
  Higher-Income       0.83      0.83      0.83         6
Non-Gentrifying       0.33      0.50      0.40         2

       accuracy                           0.73        11
      macro avg       0.72      0.67      0.68        11
   weighted avg       0.79      0.73      0.75        11


Running experiment with neuro

# Experimening Batch Size 

In [11]:

class_weights = np.array([2.5, 1.0, 2.5])  # Adjusted to boost minority classes


def build_and_train_model(batch_size):
    np.random.seed(42)
    d = X_train.shape[1] 
    h1 = 32 # picked 31 and 16  even tho 18,8 has similar result because its a complex classification prob, lean towards more complex model 
    h2 = 16
    o = 3 
    
    W1 = np.random.randn(d, h1) / np.sqrt(d)
    b1 = np.zeros((h1, 1))
    W2 = np.random.randn(h1, h2) / np.sqrt(h1)
    b2 = np.zeros((h2, 1))
    W3 = np.random.randn(h2, o) / np.sqrt(h2)
    b3 = np.zeros((o, 1))
    
    eta = 0.01  # from exp 1
    dropout_rate = 0.3  # from exp 2
    epochs = 1000
    n = X_train.shape[0]
    val_accuracies = []
    
    for epoch in range(epochs):
        indices = np.random.permutation(n)
        X_train_shuffled = X_train[indices]
        y_train_shuffled = y_train[indices]
        
        for start in range(0, n, batch_size):
            end = min(start + batch_size, n)
            X_batch = X_train_shuffled[start:end]
            y_batch = y_train_shuffled[start:end]
            
            dW1, db1 = np.zeros_like(W1), np.zeros_like(b1)
            dW2, db2 = np.zeros_like(W2), np.zeros_like(b2)
            dW3, db3 = np.zeros_like(W3), np.zeros_like(b3)
            loss = 0
            
            for i in range(X_batch.shape[0]):
                x = X_batch[i].reshape(d, 1)
                y_true = y_batch[i].reshape(o, 1)
                
                z1 = np.dot(W1.T,x) + b1
                h1 = relu(z1)
                h1, mask1 = apply_dropout(h1, dropout_rate)
    
                z2 = np.dot(W2.T, h1) + b2
                h2 = relu(z2)
                h2, mask2 = apply_dropout(h2, dropout_rate)
    
                z3 = np.dot(W3.T,h2) + b3
                y_pred = softmax(z3)
                
                loss += compute_loss(y_true, y_pred, class_weights)
                
                dz3 = y_pred - y_true
                dW3 += np.dot(h2,dz3.T)
                db3 += dz3
                
                dh2 = np.dot(W3,dz3)
                dh2 *= mask2 / (1-dropout_rate)
                dz2 = dh2 * relu_deriv(z2)
                dW2 += np.dot(h1,dz2.T)
                db2 += dz2
                
                dh1 = np.dot(W2, dz2)
                dh1 *= mask1 / (1-dropout_rate)
                dz1 = dh1 * relu_deriv(z1)
                dW1 += np.dot(x,dz1.T)
                db1 += dz1
            
            dW1 /= X_batch.shape[0]
            db1 /= X_batch.shape[0]
            dW2 /= X_batch.shape[0]
            db2 /= X_batch.shape[0]
            dW3 /= X_batch.shape[0]
            db3 /= X_batch.shape[0]
            
            W1 -= eta * dW1
            b1 -= eta * db1
            W2 -= eta * dW2
            b2 -= eta * db2
            W3 -= eta * dW3
            b3 -= eta * db3

    
    y_pred_test = []
    for i in range(X_test.shape[0]):
        x = X_test[i].reshape(d, 1)
        
        z1 = np.dot(W1.T,x) + b1
        h1 = relu(z1)
        z2 = np.dot(W2.T, h1) + b2
        h2 = relu(z2)
        z3 = np.dot(W3.T,h2) + b3
        y_pred = softmax(z3)
        
        y_pred_test.append(np.argmax(y_pred))
    y_pred_test = np.array(y_pred_test)
    y_test_labels = np.argmax(y_test, axis=1)
    
    accuracy = accuracy_score(y_test_labels, y_pred_test)
    conf_matrix = confusion_matrix(y_test_labels, y_pred_test)
    class_report = classification_report(y_test_labels, y_pred_test, labels=[0, 1, 2], target_names=le.classes_, zero_division=0)
    
    test_data_copy = test_data.copy()
    test_data_copy['predicted_label'] = le.inverse_transform(y_pred_test)
    
    print(f"\nBatch Size: {batch_size}")
    print("Classification Report:\n", class_report)
    
    return accuracy, test_data_copy

batch_sizes = [4, 8, 16, 32]

results = []
val_histories = []
test_data_all = []

for batch_size in batch_sizes:
    print(f"\nbatch size: {batch_size}")
    accuracy, test_data_result = build_and_train_model(batch_size)
    results.append((batch_size, accuracy))
    test_data_all.append(test_data_result)


print("\nSummary of Batch Size Experiment:")
for batch_size, acc in results:
    print(f"Batch Size: {batch_size}, Test Accuracy: {acc:.2f}")


batch size: 4

Batch Size: 4
Classification Report:
                  precision    recall  f1-score   support

    Gentrifying       1.00      0.67      0.80         3
  Higher-Income       0.71      0.83      0.77         6
Non-Gentrifying       0.50      0.50      0.50         2

       accuracy                           0.73        11
      macro avg       0.74      0.67      0.69        11
   weighted avg       0.75      0.73      0.73        11


batch size: 8

Batch Size: 8
Classification Report:
                  precision    recall  f1-score   support

    Gentrifying       1.00      0.67      0.80         3
  Higher-Income       0.71      0.83      0.77         6
Non-Gentrifying       0.50      0.50      0.50         2

       accuracy                           0.73        11
      macro avg       0.74      0.67      0.69        11
   weighted avg       0.75      0.73      0.73        11


batch size: 16

Batch Size: 16
Classification Report:
                  precision    re

# Learning Rate Again

In [13]:

def build_and_train_model(eta):
    np.random.seed(42)
    d = X_train.shape[1]
    h1 = 32
    h2 = 16
    o = 3
    
    W1 = np.random.randn(d, h1) / np.sqrt(d)
    b1 = np.zeros((h1, 1))
    W2 = np.random.randn(h1, h2) / np.sqrt(h1)
    b2 = np.zeros((h2, 1))
    W3 = np.random.randn(h2, o) / np.sqrt(h2)
    b3 = np.zeros((o, 1))
    
    dropout_rate = 0.3
    batch_size = 8 # picked this one from the batch experiment bc the confusion matrix is best balanced
    epochs = 1000
    n = X_train.shape[0]
    val_accuracies = []
    
    for epoch in range(epochs):
        indices = np.random.permutation(n)
        X_train_shuffled = X_train[indices]
        y_train_shuffled = y_train[indices]
        
        for start in range(0, n, batch_size):
            end = min(start + batch_size, n)
            X_batch = X_train_shuffled[start:end]
            y_batch = y_train_shuffled[start:end]
            
            dW1, db1 = np.zeros_like(W1), np.zeros_like(b1)
            dW2, db2 = np.zeros_like(W2), np.zeros_like(b2)
            dW3, db3 = np.zeros_like(W3), np.zeros_like(b3)
            loss = 0
            
            for i in range(X_batch.shape[0]):
                x = X_batch[i].reshape(d, 1)
                y_true = y_batch[i].reshape(o, 1)
                
                z1 = np.dot(W1.T,x) + b1
                h1 = relu(z1)
                h1, mask1 = apply_dropout(h1, dropout_rate)
    
                z2 = np.dot(W2.T, h1) + b2
                h2 = relu(z2)
                h2, mask2 = apply_dropout(h2, dropout_rate)
    
                z3 = np.dot(W3.T,h2) + b3
                y_pred = softmax(z3)
                
                loss += compute_loss(y_true, y_pred, class_weights)
                
                dz3 = y_pred - y_true
                dW3 += np.dot(h2, dz3.T)
                db3 += dz3
                
                dh2 = np.dot(W3, dz3)
                dh2 *= mask2 / (1-dropout_rate)
                dz2 = dh2 * relu_deriv(z2)
                dW2 += np.dot(h1, dz2.T)
                db2 += dz2
                
                dh1 = np.dot(W2,dz2)
                dh1 *= mask1 / (1-dropout_rate)
                dz1 = dh1 * relu_deriv(z1)
                dW1 += np.dot(x,dz1.T)
                db1 += dz1
            
            dW1 /= X_batch.shape[0]
            db1 /= X_batch.shape[0]
            dW2 /= X_batch.shape[0]
            db2 /= X_batch.shape[0]
            dW3 /= X_batch.shape[0]
            db3 /= X_batch.shape[0]
            
            W1 -= eta * dW1
            b1 -= eta * db1
            W2 -= eta * dW2
            b2 -= eta * db2
            W3 -= eta * dW3
            b3 -= eta * db3



    y_pred_test = []
    for i in range(X_test.shape[0]):
        x = X_test[i].reshape(d, 1)
        z1 = np.dot(W1.T,x) + b1
        h1 = relu(z1)
        z2 = np.dot(W2.T, h1) + b2
        h2 = relu(z2)
        z3 = np.dot(W3.T,h2) + b3
        y_pred = softmax(z3)
        
        y_pred_test.append(np.argmax(y_pred))
    y_pred_test = np.array(y_pred_test)
    y_test_labels = np.argmax(y_test, axis=1)
    
    accuracy = accuracy_score(y_test_labels, y_pred_test)
    conf_matrix = confusion_matrix(y_test_labels, y_pred_test)
    class_report = classification_report(y_test_labels, y_pred_test, labels=[0, 1, 2], target_names=le.classes_, zero_division=0)
    
    test_data_copy = test_data.copy()
    test_data_copy['predicted_label'] = le.inverse_transform(y_pred_test)
    
    print(f"\nLearning Rate: {eta}")
    print("Classification Report:\n", class_report)
    
    return accuracy, test_data_copy

learning_rates = [0.001, 0.005, 0.01, 0.05]

results = []
val_histories = []
test_data_all = []

for eta in learning_rates:
    print(f"\nlearning rate: {eta}")
    accuracy, test_data_result = build_and_train_model(eta)
    results.append((eta, accuracy))
    test_data_all.append(test_data_result)


print("\nSummar of Learning Rate Experiment 2:")
for eta, acc in results:
    print(f"Learning Rate: {eta}, Test Accuracy: {acc:.2f}")


learning rate: 0.001

Learning Rate: 0.001
Classification Report:
                  precision    recall  f1-score   support

    Gentrifying       0.00      0.00      0.00         3
  Higher-Income       0.55      1.00      0.71         6
Non-Gentrifying       0.00      0.00      0.00         2

       accuracy                           0.55        11
      macro avg       0.18      0.33      0.24        11
   weighted avg       0.30      0.55      0.39        11


learning rate: 0.005

Learning Rate: 0.005
Classification Report:
                  precision    recall  f1-score   support

    Gentrifying       0.50      0.33      0.40         3
  Higher-Income       0.62      0.83      0.71         6
Non-Gentrifying       0.00      0.00      0.00         2

       accuracy                           0.55        11
      macro avg       0.38      0.39      0.37        11
   weighted avg       0.48      0.55      0.50        11


learning rate: 0.01

Learning Rate: 0.01
Classification Rep

## Experiment with number of epochs

In [15]:

def build_and_train_model(epochs_amt):
    np.random.seed(42)
    d = X_train.shape[1]
    h1 = 32 
    h2 = 16
    o = 3
    
    W1 = np.random.randn(d, h1) / np.sqrt(d)
    b1 = np.zeros((h1, 1))
    W2 = np.random.randn(h1, h2) / np.sqrt(h1)
    b2 = np.zeros((h2, 1))
    W3 = np.random.randn(h2, o) / np.sqrt(h2)
    b3 = np.zeros((o, 1))
    
    dropout_rate = 0.3
    batch_size = 8 # picked this one from the batch experiment bc the confusion matrix is best balanced
    eta = .01 # best from second learning rate exp, tied with .05 go with .1
    n = X_train.shape[0]
    
    for epoch in range(epochs_amt):
        indices = np.random.permutation(n)
        X_train_shuffled = X_train[indices]
        y_train_shuffled = y_train[indices]
        
        for start in range(0, n, batch_size):
            end = min(start + batch_size, n)
            X_batch = X_train_shuffled[start:end]
            y_batch = y_train_shuffled[start:end]
            
            dW1, db1 = np.zeros_like(W1), np.zeros_like(b1)
            dW2, db2 = np.zeros_like(W2), np.zeros_like(b2)
            dW3, db3 = np.zeros_like(W3), np.zeros_like(b3)
            loss = 0
            
            for i in range(X_batch.shape[0]):
                x = X_batch[i].reshape(d, 1)
                y_true = y_batch[i].reshape(o, 1)
                
                z1 = np.dot(W1.T,x) + b1
                h1 = relu(z1)
                h1, mask1 = apply_dropout(h1, dropout_rate)
    
                z2 = np.dot(W2.T, h1) + b2
                h2 = relu(z2)
                h2, mask2 = apply_dropout(h2, dropout_rate)
    
                z3 = np.dot(W3.T,h2) + b3
                y_pred = softmax(z3)
                
                loss += compute_loss(y_true, y_pred, class_weights)
                
                dz3 = y_pred - y_true
                dW3 += np.dot(h2, dz3.T)
                db3 += dz3
                
                dh2 = np.dot(W3, dz3)
                dh2 *= mask2 / (1-dropout_rate)
                dz2 = dh2 * relu_deriv(z2)
                dW2 += np.dot(h1,dz2.T)
                db2 += dz2
                
                dh1 = np.dot(W2,dz2)
                dh1 *= mask1 / (1-dropout_rate)
                dz1 = dh1 * relu_deriv(z1)
                dW1 += np.dot(x,dz1.T)
                db1 += dz1
            
            dW1 /= X_batch.shape[0]
            db1 /= X_batch.shape[0]
            dW2 /= X_batch.shape[0]
            db2 /= X_batch.shape[0]
            dW3 /= X_batch.shape[0]
            db3 /= X_batch.shape[0]
            
            W1 -= eta * dW1
            b1 -= eta * db1
            W2 -= eta * dW2
            b2 -= eta * db2
            W3 -= eta * dW3
            b3 -= eta * db3
    

    y_pred_test = []
    for i in range(X_test.shape[0]):
        x = X_test[i].reshape(d, 1)
        z1 = np.dot(W1.T,x) + b1
        h1 = relu(z1)
        z2 = np.dot(W2.T, h1) + b2
        h2 = relu(z2)
        z3 = np.dot(W3.T,h2) + b3
        y_pred = softmax(z3)
        
        y_pred_test.append(np.argmax(y_pred))
    y_pred_test = np.array(y_pred_test)
    y_test_labels = np.argmax(y_test, axis=1)
    
    accuracy = accuracy_score(y_test_labels, y_pred_test)
    conf_matrix = confusion_matrix(y_test_labels, y_pred_test)
    class_report = classification_report(y_test_labels, y_pred_test, labels=[0, 1, 2], target_names=le.classes_, zero_division=0)
    
    test_data_copy = test_data.copy()
    test_data_copy['predicted_label'] = le.inverse_transform(y_pred_test)
    
    print(f"\n Epochs trained with: {epochs_amt}")
    print("Test Set Classifications:")
    print(test_data_copy[['Sub-Borough Area', 'actual_label', 'predicted_label']])
    print(f"Test Accuracy: {accuracy:.2f}")
    print("Confusion Matrix:\n", conf_matrix)
    print("Classification Report:\n", class_report)
    
    return accuracy, test_data_copy

epoch_range = [ 400,600, 800, 1000, 1500, 2000]

results = []
test_data_all = []

for epoch in epoch_range:
    print(f"\nRunning experiment with epoch of: {epoch}")
    accuracy, test_data_result = build_and_train_model(epoch)
    results.append((epoch, accuracy))
    test_data_all.append(test_data_result)


print("\nSummary of Epcoh Experiment:")
for epoch, acc in results:
    print(f"Epoch {epoch}, Test Accuracy: {acc:.2f}")


Running experiment with epoch of: 400

 Epochs trained with: 400
Test Set Classifications:
                 Sub-Borough Area     actual_label  predicted_label
44            Flushing/Whitestone      Gentrifying    Higher-Income
45        Hillcrest/Fresh Meadows    Higher-Income  Non-Gentrifying
46           Ozone Park/Woodhaven    Higher-Income    Higher-Income
47  South Ozone Park/Howard Beach  Non-Gentrifying    Higher-Income
48            Bayside/Little Neck    Higher-Income    Higher-Income
49                        Jamaica      Gentrifying    Higher-Income
50                 Queens Village    Higher-Income    Higher-Income
51                      Rockaways    Higher-Income    Higher-Income
52                    North Shore  Non-Gentrifying    Higher-Income
53                     Mid-Island      Gentrifying      Gentrifying
54                    South Shore    Higher-Income    Higher-Income
Test Accuracy: 0.55
Confusion Matrix:
 [[1 2 0]
 [0 5 1]
 [0 2 0]]
Classification Report:
  