Import dataset

In [1]:
from ucimlrepo import fetch_ucirepo
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np

# fetch dataset
default_of_credit_card_clients = fetch_ucirepo(id=350)

# data (as pandas dataframes)
X = default_of_credit_card_clients.data.features
y = default_of_credit_card_clients.data.targets


Start preprocessing of data

In [2]:
# combine X and Y to see how many rows have NaN
combined = pd.concat([X, y], axis=1)

# drop rows with NaN
combined = combined.dropna(how='any')

# split back into X and y
X = combined.drop(columns=['Y'])  # drop the 'Y' column
y = combined.drop(columns=X.columns)


Split and scale data

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

scaler = StandardScaler()

# use our scaler to fit and transform our training data
# X_train = scaler.fit_transform(X_train)
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=X_test.columns)

# todo: why do we have to scale this too!
X_test = pd.DataFrame(scaler.fit_transform(X_test), columns=X_test.columns)

# add our intercepts
X.insert(0, "intercept", 1)        # after scaling they are separate dataframes
X_test.insert(0, "intercept", 1)
X_train.insert(0, "intercept", 1)

Initialize our Hyperparameters for Gradient Descent

In [5]:
# Initialize B to 0
beta_gd = np.zeros((X.shape[1], 1))

# Set learning rate eta
eta = 0.000001

# Set number of iterations
max_iter = 20000

# Set batch size
batch_size = 20

# Set epochs (for SGD)
epochs = 5

#TODO: Below is very subject to change

In [6]:
# gradient function that can take different sizes (even 1 point)
def gradient(x_train_batch, y_train_batch, beta):
    return -2 * x_train_batch.T @ y_train_batch + 2 * x_train_batch.T @ (x_train_batch @ beta)

In [7]:
# Batch Gradient Descent

# Train model
for _ in range(max_iter):
    # not shuffling data for simplicity
    grad = gradient(X_train.values, y_train.values, beta_gd)
    beta_gd = beta_gd - eta * grad

# Predict on test set
y_test_hat = X_test.values.dot(beta_gd)
residuals = y_test.values - y_test_hat

# Calculate RSS
RSS = (residuals.T.dot(residuals))
print(RSS)

[[887.34466549]]


In [32]:
# Stochastic Gradient Descent
for _ in range(epochs):
    # Shuffle data
    new_indices = np.random.permutation(len(X_train))
    X_train_shuf = X_train.values[new_indices]
    y_train_shuf = y_train.values[new_indices]

    for i in range(len(X_train_shuf)):
        grad = gradient(np.atleast_2d(X_train_shuf[i]), y_train_shuf, beta_gd)
        beta_gd = beta_gd - eta * grad

y_test_hat = X_test.values.dot(beta_gd)
residuals = y_test.values - y_test_hat

# Calculate RSS
RSS = (residuals.T.dot(residuals))
print(RSS)

TypeError: 'numpy.ndarray' object is not callable

In [None]:
# Mini-Batch Gradient Descent
for _ in range(epochs):
    # Shuffle data
    new_indices = np.random.permutation(len(X_train))
    X_train_shuf = X_train.values[new_indices]
    y_train_shuf = y_train.values[new_indices]

    # todo: more qualifications on randomizing batch?
    for i in range(len(X_train_shuf)):
        gradient = gradient(np.atleast_2d(X_train_shuf[i:i+batch_size]), np.atleast_2d(y_train_shuf[i:i+batch_size]), beta_gd)
        beta_gd = beta_gd - eta * gradient

y_test_hat = X_test.values.dot(beta_gd)
residuals = y_test.values - y_test_hat

# Calculate RSS
RSS = (residuals.T.dot(residuals))
print(RSS)