In [None]:
import os
os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'
import torch
import torchvision.transforms as transforms
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from scipy.stats import linregress, t, f
import matplotlib.pyplot as plt
import torch.nn as nn
from scipy.optimize import curve_fit
import torch.optim as optim
import random
import warnings
import json
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
def set_seed(seed):
    # Set seed for CPU
    torch.manual_seed(seed)
    # Set seed for GPU (if available)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)  # For multiple GPUs
    # Ensure deterministic behavior
    torch.use_deterministic_algorithms(True)

    # Set seed for NumPy",
    np.random.seed(seed)

    # Set seed for Python's random module",
    random.seed(seed)

In [None]:
seed = 42
set_seed(seed)

# Learning rate boundary calculation
data = fetch_california_housing()
X = data.data

scaler_X = StandardScaler()
X = scaler_X.fit_transform(X)

X = X.T

# Calculate Sigma
Sigma = (1 / (X.shape[1] - 1)) * X @ X.T

# Calculate eigenvalues of Sigma
eigenvalues = np.linalg.eigvals(Sigma)

# Find the maximum eigenvalue
max_eigenvalue = np.max(eigenvalues)

#Find the maximum learning rate
max_lr = 2 * X.shape[1] / (max_eigenvalue * (X.shape[1] - 1))

# Print the maximum learning rate
print(f"Maximum Learning Rate: {max_lr}")

In [None]:
class FeedForwardNN(nn.Module):
    def __init__(self, input_dim, width, depth):
        super(FeedForwardNN, self).__init__()
        layers = []
        layers.append(nn.Linear(input_dim, width))
        layers.append(nn.ReLU())
        for _ in range(depth - 1):
            layers.append(nn.Linear(width, width))
            layers.append(nn.ReLU())
        layers.append(nn.Linear(width, 1))  # Output layer
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)

In [None]:
def exponential_test(loss, t_start):
  with warnings.catch_warnings():
      # Ignore all warnings in this block
      warnings.simplefilter("ignore")
      def exponential_decay(x, a, b, c):
        # Calculate the exponent
        exponent = -b * x
        return a * exponent + c

      x = np.arange(t_start, len(loss)+t_start)
      y = np.array(loss)
      p_exp, _ = curve_fit(exponential_decay, x, y) # Parameters of exponential decay fitting
      lin_slope, lin_intercept, r, p, se = linregress(x, y) # Parameters of linear fitting
      ss_exp = np.sum((y - exponential_decay(x, *p_exp))**2) # Residuals for exponential
      ss_lin = np.sum((y - (lin_slope*x + lin_intercept))**2) # Residuals for linear
      df1 = len(x) - 2 # Degrees of freedom for linear
      df2 = len(x) - 3 # Degrees of freedom for exponential
      f_stat = (ss_lin - ss_exp) / (df1 - df2) / (ss_exp / df2) # F-test of residuals
      p = 1 - f.cdf(f_stat, df1-df2, df2)
      return p

def linear_test(loss, t_start):
  x = np.arange(t_start, len(loss)+t_start)
  y = np.array(loss)
  slope, intercept, r_value, p_value, std_err = linregress(x, y)
  # Calculate the t-statistic
  t_stat = slope / std_err
  # Degrees of freedom
  df = len(x) - 2
  # Calculate the one-tailed p-value
  p = t.cdf(t_stat, df)
  return p

In [None]:
def train_exptest(model, max_lr, alpha=0.05, beta=0.1, epochs=50):
    # Load California Housing Data
    data = fetch_california_housing()
    X = data.data
    y = data.target

    # Split Data
    X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.25, random_state=42)  # 0.25 x 0.8 = 0.2

    # Standardize Data
    scaler_X = StandardScaler()
    X_train = scaler_X.fit_transform(X_train)
    X_val = scaler_X.transform(X_val)
    X_test = scaler_X.transform(X_test)

    # Convert to PyTorch tensors
    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
    X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
    y_val_tensor = torch.tensor(y_val, dtype=torch.float32).view(-1, 1)
    X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
    y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

    # Check for GPU
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Initialize Model, Loss Function, and Optimizer
    input_dim = X_train.shape[1]
    criterion = nn.MSELoss()
    optimizer = optim.SGD(model.parameters(), lr=max_lr)

    # Move data to GPU
    X_train_tensor = X_train_tensor.to(device)
    y_train_tensor = y_train_tensor.to(device)
    X_val_tensor = X_val_tensor.to(device)
    y_val_tensor = y_val_tensor.to(device)
    X_test_tensor = X_test_tensor.to(device)
    y_test_tensor = y_test_tensor.to(device)

    overall_losses = []
    train_losses = []
    val_losses = []
    exp_test = False
    t_start = 0
    window = None
    curr_lr = max_lr
    initial_loss = None
    best_model = None
    best_val = float('inf')
    count = 0
    while True:
      if count == epochs:
          break
      model.train()
      optimizer.zero_grad()
      outputs = model(X_train_tensor)
      train_loss = criterion(outputs, y_train_tensor)
      train_losses.append(train_loss.detach().item())
      overall_losses.append(train_loss.detach().item())
      train_loss.backward()
      optimizer.step()

      if initial_loss is None:
        initial_loss = train_loss.detach().item()

      if window == None:
        window = round(2 * np.sqrt(2) * initial_loss / (max_lr * np.exp(1)))

      elif (len(train_losses) == window):
        if exp_test == False:
          p = exponential_test(train_losses, t_start)
          if p < alpha:
            exp_test = True
            t_start += len(train_losses)
            train_losses = []
          else:
            curr_lr *= beta
            model = FeedForwardNN(input_dim, 32, 2).to(device)
            optimizer = optim.SGD(model.parameters(), lr=curr_lr)
            window = round(2 * np.sqrt(2) * initial_loss / (curr_lr * np.exp(1)))
            train_losses = []

        else:
            p = linear_test(train_losses, t_start)
            if p < alpha:
              t_start += len(train_losses)
              train_losses = []
            else:
              t_start += len(train_losses)
              curr_lr *= beta
              optimizer = optim.SGD(model.parameters(), lr=curr_lr)
              window = round(2 * np.sqrt(2) * initial_loss / (curr_lr * np.exp(1)))
              train_losses = []

      model.eval()
      with torch.no_grad(): # Validation loss
        val_outputs = model(X_val_tensor)
        val_loss = criterion(val_outputs, y_val_tensor)
      if (val_loss.item() < best_val):
        best_model = model.state_dict()
        best_val = val_loss.item()

      val_losses.append(val_loss.detach().item())
      count += 1
    model.load_state_dict(best_model)
    # Evaluate on Test Set
    model.eval()
    with torch.no_grad():
        test_outputs = model(X_test_tensor)
        test_loss = criterion(test_outputs, y_test_tensor)

    print(f'Test Loss: {test_loss.detach().item()}')

    return overall_losses, val_losses, test_loss.detach().item()

In [None]:
def train_model(model, epochs=50):
    # Load California Housing Data
    data = fetch_california_housing()
    X = data.data
    y = data.target

    # Split Data
    X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.25, random_state=42)  # 0.25 x 0.8 = 0.2

    # Standardize Data
    scaler_X = StandardScaler()
    X_train = scaler_X.fit_transform(X_train)
    X_val = scaler_X.transform(X_val)
    X_test = scaler_X.transform(X_test)

    # Convert to PyTorch tensors
    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
    X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
    y_val_tensor = torch.tensor(y_val, dtype=torch.float32).view(-1, 1)
    X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
    y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

    # Check for GPU
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Initialize Model, Loss Function, and Optimizer
    input_dim = X_train.shape[1]
    criterion = nn.MSELoss()

    # Move data to GPU
    X_train_tensor = X_train_tensor.to(device)
    y_train_tensor = y_train_tensor.to(device)
    X_val_tensor = X_val_tensor.to(device)
    y_val_tensor = y_val_tensor.to(device)
    X_test_tensor = X_test_tensor.to(device)
    y_test_tensor = y_test_tensor.to(device)

    train_losses = []
    val_losses = []
    best_model = None
    best_val = float('inf')
    count = 0

    while True:
      if count == epochs:
          break
      model.train()
      optimizer.zero_grad()
      outputs = model(X_train_tensor)
      train_loss = criterion(outputs, y_train_tensor)
      train_losses.append(train_loss.detach().item())
      train_loss.backward()
      optimizer.step()

      model.eval()
      with torch.no_grad():
        val_outputs = model(X_val_tensor)
        val_loss = criterion(val_outputs, y_val_tensor)
      if val_loss.item() < best_val:
       best_model = model.state_dict()
       best_val = val_loss.item()

      val_losses.append(val_loss.detach().item())
      count += 1

    model.load_state_dict(best_model)
    # Evaluate on Test Set
    model.eval()
    with torch.no_grad():
        test_outputs = model(X_test_tensor)
        test_loss = criterion(test_outputs, y_test_tensor)

    print(f'Test Loss: {test_loss.detach().item()}')

    return train_losses, val_losses, test_loss.detach().item()

In [None]:
def save_dict_to_file(dictionary, file_path):
    """
    Saves a dictionary to a text file.

    Parameters:
    - dictionary (dict): The dictionary to be saved.
    - file_path (str): The path to the file where the dictionary will be saved.
    """
    try:
        with open(file_path, 'w') as file:
            json.dump(dictionary, file, indent=4)
        print(f"Dictionary successfully saved to {file_path}")
    except Exception as e:
        print(f"An error occurred while saving the dictionary: {e}")

In [None]:
# Training block for ExpTest

epochs = 10000
num_trials = 5

train_losses_dict = {}
val_losses_dict = {}
test_losses_dict = {}

for trial in range(num_trials):
  set_seed(trial)

  model = FeedForwardNN(X.shape[0], 32, 2).to(device)
  # Train with custom SGD optimizer
  train_losses, val_losses, test_loss = train_expTest(model, max_lr, alpha=0.05, beta=0.33, epochs=epochs)

  if train_losses_dict.get(trial) is None:
    train_losses_dict[trial] = [train_losses]
  else:
    train_losses_dict[trial].append(train_losses)

  if val_losses_dict.get(trial) is None:
    val_losses_dict[trial] = [val_losses]
  else:
    val_losses_dict[trial].append(val_losses)

  if test_losses_dict.get(trial) is None:
    test_losses_dict[trial] = [test_loss]
  else:
    test_losses_dict[trial].append(test_loss)

  print(f"Test Loss (ExpTest): {test_loss}")
  plt.plot(train_losses)
  plt.show()

In [None]:
filename = "train_losses_cali_algo.txt"
save_dict_to_file(train_losses_dict, filename)

filename = "val_losses_cali_algo.txt"
save_dict_to_file(val_losses_dict, filename)

filename = "test_losses_cali_algo.txt"
save_dict_to_file(test_losses_dict, filename)

In [None]:
test_losses = []
for trial, loss in test_losses_dict.items():
    test_losses.append(loss[0])

print("Test Loss ExpTest:", round(np.mean(test_losses), 4), "+/-", round(np.std(test_losses), 4))

In [None]:
# Training block for other optimizers - Adam shown as example.
# Just change the optimizer and factor variable.

epochs = 10000
num_trials = 5
factor = 1 # Change as needed
initial_lr = factor * max_lr

train_losses_dict = {}
val_losses_dict = {}
test_losses_dict = {}

for trial in range(num_trials):
  set_seed(trial)

  model = FeedForwardNN(X.shape[0], 32, 2).to(device)
  optimizer = optim.Adam(model.parameters(), lr=initial_lr) # Change as needed
  train_losses, val_losses, test_loss = train_model(model, epochs=epochs)

  if train_losses_dict.get(trial) is None:
    train_losses_dict[trial] = [train_losses]
  else:
    train_losses_dict[trial].append(train_losses)

  if val_losses_dict.get(trial) is None:
    val_losses_dict[trial] = [val_losses]
  else:
    val_losses_dict[trial].append(val_losses)

  if test_losses_dict.get(trial) is None:
    test_losses_dict[trial] = [test_loss]
  else:
    test_losses_dict[trial].append(test_loss)

  print(f"Test Loss (Adam): {test_loss}") # Change as needed
  plt.plot(train_losses)
  plt.show()

In [None]:
# Change filenames as needed

filename = f"train_losses_cali_Adam_{factor}x.txt"
save_dict_to_file(train_losses_dict, filename)

filename = f"val_losses_cali_Adam_{factor}x.txt"
save_dict_to_file(val_losses_dict, filename)

filename = f"test_losses_cali_Adam_{factor}x.txt"
save_dict_to_file(test_losses_dict, filename)

In [None]:
test_losses = []
for trial, loss in test_losses_dict.items():
    test_losses.append(loss[0])
# Change optimizer name as needed
print("Test Loss Adam:", round(np.mean(test_losses), 4), "+/-", round(np.std(test_losses), 4))