In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error
from util_funcs import process_data, split_data
from neural_networks import SimpleNN, ComplexNN

In [None]:
df = pd.read_csv('/Users/niahodges/Downloads/Spring 2024/Practicum in Data Analytics and Statistics/project/temp-data/cps_ALL.csv')

In [None]:
df

In [None]:
df_temp1 = df.copy()
df_gender = process_data(df_temp1,'gender')
df_temp2 = df.copy()
df_noGender = process_data(df_temp2,'no-gender')

In [None]:
X_train_G, X_test_G, y_train_G, y_test_G = split_data(df_gender)
X_train_NG, X_test_NG, y_train_NG, y_test_NG = split_data(df_noGender)

# Gender inclusive models

## Complex NN

In [None]:
input_size=X_train_G.shape[1]
hidden_sizes=[X_train_G.shape[1], 45, 22, 11]
output_size=1

# weights_input_hidden = np.random.randn(input_size, hidden_size)
# weights_hidden_output = np.random.randn(hidden_size, output_size)
weights = []
layer_input_size = input_size
for hidden_size in hidden_sizes:
    weights.append(np.random.randn(layer_input_size, hidden_size))
    layer_input_size = hidden_size
weights.append(np.random.randn(layer_input_size, output_size))

In [None]:
def forward(inputs):
    # Perform forward pass
    layer_input = inputs
    for weight in weights[:-1]:
        layer_output = np.dot(layer_input, weight)
        layer_input = sigmoid(layer_output)
    output = np.dot(layer_input, weights[-1])
    return output

def sigmoid(x):
    # Sigmoid activation function
    return 1 / (1 + np.exp(-x))

def get_weights(self):
    return weights

In [None]:
mse_history = []
mae_history = []
X_train = X_train_G
y_train = y_train_G
learning_rate=0.01
epochs = 100

print('Training complex NN...')

for epoch in range(epochs):
    # Forward pass
    print(f'Forward pass for epoch {epoch+1}')
    output = forward(X_train)
    output = output.reshape(-1)         ### UPDATE ###

    # Calculate MSE and MAE
    print(f'Calculating MSE and MAE for epoch {epoch+1}')
    if np.isnan(output).any():                 ### UPDATE ###
        output = np.nan_to_num(output)
    mse = mean_squared_error(y_train, output)
    mae = mean_absolute_error(y_train, output)

    # Store MSE and MAE
    mse_history.append(mse)
    mae_history.append(mae)

    # Backpropagation
    print(f'Backpropogation for epoch {epoch+1}')
    error = output - y_train
    delta_output = error
    delta_output = delta_output.reshape(-1, 1)        ### UPDATE ###
    delta_hidden = []
    layer_input = X_train

    # Calculate deltas for hidden layers
    for i in range(len(weights) - 1, 0, -1):
        # delta_hidden.append(np.dot(delta_output, weights[i].T) * (layer_input * (1 - layer_input)))
        if np.isnan(delta_output).any():
            delta_output[np.isnan(delta_output)] = 0
        # if np.isnan(weights[i]).any():
        #     weights[i][np.isnan(weights[i])] = 0
        if np.isnan(layer_input).any():
            layer_input[np.isnan(layer_input)] = 0
        delta_hidden.append(np.dot(np.dot(delta_output, weights[i].reshape(-1, 1).T).T, (layer_input * (1 - layer_input))))

    # Update weights
    print(f'Updating weights for epoch {epoch+1}')
    for i in range(len(weights) - 1, 0, -1):
        weights[i] -= learning_rate * np.dot(sigmoid(np.dot(X_train, weights[i - 1])).T, delta_output)
        delta_output = delta_hidden.pop()

    if epoch % 10 == 0:
        print(f"Epoch {epoch + 1}/{epochs}, MSE: {mse}, MAE: {mae}")

In [None]:
nn2_weights_G = get_weights()

In [None]:
nn2_weights_df = pd.DataFrame(nn2_weights_G)
nn2_weights_df.to_csv('logs/nn2_G_weights.csv', index=False)

In [None]:
learning_history_G = {'epoch':range(1,101),
                      # 'mse_lasso':mseHistory_lasso_G,
                      # 'mse_ridge':mseHistory_ridge_G,
                      # 'mse_svr':mseHistory_svr_G,
                      # 'mse_nn1':mse_history,
                      'mse_nn2':mse_history,
                      # 'mae_lasso':maeHistory_lasso_G,
                      # 'mae_ridge':maeHistory_ridge_G,
                      # 'mae_svr':maeHistory_svr_G,
                      # 'mae_nn1':mae_history,
                      'mae_nn2':mae_history
                     }

In [None]:
df_learning_history_G = pd.DataFrame(learning_history_G)
csv_filename = 'logs/training_history_ComplexNN_gender.csv'
df_learning_history_G.to_csv(csv_filename, index=False)

# Gender exclusive models

## Complex NN

In [None]:
input_size=X_train_NG.shape[1]
hidden_sizes=[X_train_G.shape[1], 45, 22, 11]
output_size=1

# weights_input_hidden = np.random.randn(input_size, hidden_size)
# weights_hidden_output = np.random.randn(hidden_size, output_size)
weights = []
layer_input_size = input_size
for hidden_size in hidden_sizes:
    weights.append(np.random.randn(layer_input_size, hidden_size))
    layer_input_size = hidden_size
weights.append(np.random.randn(layer_input_size, output_size))

In [None]:
def forward(inputs):
    # Perform forward pass
    layer_input = inputs
    for weight in weights[:-1]:
        layer_output = np.dot(layer_input, weight)
        layer_input = sigmoid(layer_output)
    output = np.dot(layer_input, weights[-1])
    return output

def sigmoid(self, x):
    # Sigmoid activation function
    return 1 / (1 + np.exp(-x))

def get_weights(self):
    return weights

In [None]:
# save this block for NN body

In [None]:
nn2_weights_NG = get_weights()

In [None]:
nn2_weights_df = pd.DataFrame(nn2_weights_NG)
nn2_weights_df.to_csv('logs/nn2_NG_weights.csv', index=False)

In [None]:
learning_history_NG = {'epoch':range(1,101),
                      # 'mse_lasso':mseHistory_lasso_G,
                      # 'mse_ridge':mseHistory_ridge_G,
                      # 'mse_svr':mseHistory_svr_G,
                      # 'mse_nn1':mse_history,
                      'mse_nn2':mse_history,
                      # 'mae_lasso':maeHistory_lasso_G,
                      # 'mae_ridge':maeHistory_ridge_G,
                      # 'mae_svr':maeHistory_svr_G,
                      # 'mae_nn1':mae_history,
                      'mae_nn2':mae_history
                     }

In [None]:
df_learning_history_NG = pd.DataFrame(learning_history_NG)
csv_filename = 'logs/training_history_ComplexNN_no-gender.csv'
df_learning_history_NG.to_csv(csv_filename, index=False)