In [2]:
import h5py

# Initialize a list to hold all matrices
all_matrices = []

with h5py.File(r'../dataset/random_matrices_dataset.h5', 'r') as hf:
    # Iterate over all datasets in the file
    for key in hf.keys():
        matrix = hf[key][:]
        all_matrices.append(matrix)

# At this point, all_matrices contains all matrices
print(f"Loaded {len(all_matrices)} matrices.")

Loaded 40 matrices.


In [3]:
import torch
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import wandb
import os

# ADAM + ADAM

In [25]:
import torch
import torch.optim as optim
import wandb

torch.manual_seed(42)

# Function to perform matrix factorization using Gradient Descent
def factorize_matrix(M_list, latent_dim, method, optimizer_class=optim.SGD, learning_rate=0.01, num_epochs=1000, lambda_reg=0.01):
    d, _ = M_list[0].shape
    n = len(M_list)
    
    # Initialize latent matrices P and Q
    P = torch.randn(d, latent_dim, requires_grad=True)
    Q = torch.randn(latent_dim, d, requires_grad=True)

    # Gradient Descent optimizer
    optimizer = optimizer_class([P, Q], lr=learning_rate)
    
    # Loss function (MSE + regularization)
    def loss_function(M_list, P, Q):
        total_loss = 0

        for M in M_list:
            predicted = torch.matmul(P, Q)
            mse_loss = torch.norm(M - predicted, p = 'fro') ** 2
            
            total_loss += mse_loss
       
        avg_loss = total_loss / n
        reg_term = lambda_reg * (torch.norm(P) ** 2 + torch.norm(Q) ** 2)

        return avg_loss + reg_term

    losses = []


    # Training loop
    for epoch in range(num_epochs):
        optimizer.zero_grad()
        loss = loss_function(M_list, P, Q)
        loss.backward()
        optimizer.step()
        losses.append(loss.item())

    return P, Q, losses

In [26]:
import numpy as np
import torch
import wandb
import torch.optim as optim

torch.manual_seed(42)

def matrix_factorization_experiment(matrices, method="ADAM/ADAM", 
                                    learning_rates=[0.01, 0.1], latent_dims=[5, 10, 15, 20], 
                                    num_epochs=1000, lambda_reg=0.01, mixed=False, optimizer_class=optim.Adam):

    # Placeholder for storing results
    results_usual = {}

    # Perform factorization for each combination of learning rate and latent dimension
    for lr in learning_rates:
        for latent_dim in latent_dims:
            print(f"Running experiments for LR {lr}, Latent Dim {latent_dim}")
            # Initialize wandb run for this specific set of hyperparameters
            wandb_run = wandb.init(project="alternating_orders4",
                                   name=f"LR_{lr}_Method_{method}",tags=[f"{method}_run"],
                                   reinit=True, settings=wandb.Settings(silent=True),
                                   config={"learning_rate": lr,  "method": method, "epochs": num_epochs})

            M_tensors = [torch.tensor(M, dtype=torch.float32) for M in matrices]

                
            torch.manual_seed(42)
            P, Q, avg_losses = factorize_matrix(
                M_tensors, latent_dim, method, optimizer_class=optimizer_class, 
                learning_rate=lr, num_epochs=num_epochs, lambda_reg=lambda_reg
            )

            # Log the averaged losses to wandb
            for epoch in range(num_epochs):  
                wandb.log({f"avg_loss_{method}_LR_{lr}": avg_losses[epoch]})

            # Store the results for this combination
            results_usual[(lr, latent_dim)] = avg_losses

            # Finish wandb run
            wandb_run.finish()

    return results_usual

In [27]:
results = matrix_factorization_experiment(
    all_matrices, method="Adam+Adam", optimizer_class = optim.Adam , learning_rates=[0.1,0.5], latent_dims=[5], num_epochs=1000
)


Running experiments for LR 0.1, Latent Dim 5
Running experiments for LR 0.5, Latent Dim 5


In [None]:
results = matrix_factorization_experiment(
    all_matrices, method="GD+GD", optimizer_class = optim.SGD , learning_rates=[0.001], latent_dims=[5], num_epochs=1000
)


Running experiments for LR 0.0001, Latent Dim 5


# SGD +  gradreg

In [31]:
from damped_newton import DampedNewton
from cubic_regularized_newton import CubicRegularizedNewton
import torch.optim as optim

In [None]:
import torch
import time
import torch.optim as optim

torch.manual_seed(42)

def factorize_matrix_mixed(M_list, latent_dim, method, lr_P=0.1, num_epochs=1000, lambda_reg=0.01, L=1.0, reg=0.0, variant="GradReg"):
    d, _ = M_list[0].shape  # Assuming all matrices have the same dimensions
    n = len(M_list)  # Number of matrices
    
    # Initialize latent matrices P and Q
    P = torch.randn(d, latent_dim, requires_grad=True)
    Q = torch.randn(latent_dim, d, requires_grad=True)
    
    # SGD optimizer for P
    optimizer_P = optim.Adam([P], lr=lr_P)

    # Damped Newton optimizer for Q with user-defined variant (GradReg or Classic)
    optimizer_Q = DampedNewton([Q], variant=variant, L=L, reg=reg, verbose=False)

    # Aggregated loss function (average over all matrices in M_list + regularization)
    def loss_function(M_list, P, Q):
        total_loss = 0
        for M in M_list:
            predicted = torch.matmul(P, Q)
            mse_loss = torch.norm(M - predicted, p='fro') ** 2
            total_loss += mse_loss
        avg_loss = total_loss / n
        reg_term = lambda_reg * (torch.norm(P) ** 2 + torch.norm(Q) ** 2)
        return avg_loss + reg_term

    # Training loop
    losses = []
    
    # Evaluate initial loss before training
    initial_loss = loss_function(M_list, P, Q).item()
    losses.append(initial_loss)
    
    for epoch in range(num_epochs):
        train_start_time = time.time()

        # Closure function for backward passes
        def closure():
            optimizer_P.zero_grad()  # Zero out gradients for P
            optimizer_Q.zero_grad()  # Zero out gradients for Q
            loss = loss_function(M_list, P, Q)
            loss.backward(retain_graph=True)  # Retain graph for second backward pass (for Q)
            return loss

        # Step for Q (second-order optimizer with selected variant)
        if isinstance(optimizer_Q, DampedNewton):
            optimizer_Q.step(closure)

        # Step for P (first-order optimizer)
        optimizer_P.step(closure)

        # Log the loss
        loss_value = closure().item()
        losses.append(loss_value)
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss_value:.6f}")

        epoch_time = time.time() - train_start_time

    # Reconstruct the matrix M using P and Q
    reconstructed_M = torch.matmul(P, Q).detach().numpy()

    return P, Q, losses, reconstructed_M


In [5]:
import numpy as np
import torch
import wandb
import torch.optim as optim

torch.manual_seed(42)

def matrix_factorization_experiment(matrices, method="ADAM/ADAM", 
                                    learning_rates=[0.01, 0.1], latent_dims=[5, 10, 15, 20], 
                                    num_epochs=1000, lambda_reg=0.01, mixed=False, optimizer_class=optim.Adam, 
                                    L_values=[1.0], reg_values=[0.0]):
    """
    Run matrix factorization experiments for different learning rates, latent dimensions, and regularization parameters.
    
    Parameters:
    - matrices: List of matrices to factorize.
    - optimizer_class: Optimizer class for latent matrices (e.g., Adam, SGD).
    - method: Method name to log in wandb.
    - learning_rates: List of learning rates to try.
    - latent_dims: List of latent dimensions to try.
    - num_epochs: Number of epochs for optimization.
    - lambda_reg: Regularization parameter.
    - mixed: Boolean flag to indicate if mixed optimization (e.g., GradReg) should be used.
    - L_values: List of L values (Lipschitz constant for Hessian) to try.
    - reg_values: List of regularizer values for Hessian regularizer to try.
    
    Returns:
    - results_usual: Dictionary storing the results for each combination of learning rate, latent dimension, and regularization parameters.
    """
    # Placeholder for storing results
    results_usual = {}

    # Perform factorization for each combination of learning rate, latent dimension, and regularization parameters
    for lr in learning_rates:
        for latent_dim in latent_dims:
            for L in L_values:
                for reg in reg_values:
                    print(f"Running experiments for LR {lr}, Latent Dim {latent_dim}, L {L}, reg {reg}")
                    
                    # Initialize wandb run for this specific set of hyperparameters
                    wandb_run = wandb.init(project="alternating_orders2",
                                           name=f"LR_{lr}_Method_{method}_L_{L}_reg_{reg}",
                                           reinit=True, settings=wandb.Settings(silent=True),tags=[f"{method}_run"],
                                           config={"learning_rate": lr, "method": method, "L": L, "reg": reg, "epochs": num_epochs})

                    avg_losses = np.zeros(num_epochs + 1)  # Adjusted to account for initial loss

                    # Start optimization
                    for M in matrices:

                        
                        # Convert NumPy matrix M to PyTorch tensor
                        M_tensor = torch.tensor(M, dtype=torch.float32)

                        if mixed:
                            torch.manual_seed(42)
                            # Use the GradReg Damped Newton Method for mixed optimization
                            P, Q, losses, _ = factorize_matrix_mixed(
                                M_tensor, latent_dim=latent_dim, method=method, lr_P=lr, num_epochs=num_epochs, 
                                lambda_reg=lambda_reg, L=L, reg=reg
                            )
                        else:
                            # Perform matrix factorization with the flexible function
                            P, Q, losses = factorize_matrix(
                                M_tensor, latent_dim=latent_dim, method=method, optimizer_class=optimizer_class, learning_rate=lr,
                                num_epochs=num_epochs, lambda_reg=lambda_reg
                            )

                        # Accumulate losses for averaging (including initial loss at epoch 0)
                        avg_losses += np.array(losses)

                    # Average the losses over all matrices
                    avg_losses /= len(matrices)

                    # Log the averaged losses to wandb
                    for epoch in range(num_epochs+1):  # Include epoch 0
                        wandb.log({f"avg_loss_{method}_L_{L}_reg_{reg}": avg_losses[epoch]})

                    # Store the results for this combination
                    results_usual[(lr, latent_dim, L, reg)] = avg_losses

                    # Finish wandb run
                    wandb_run.finish()

    return results_usual

In [6]:
results = matrix_factorization_experiment(
    all_matrices, method="Adam+GradReg", learning_rates=[0.5], latent_dims=[5],
    num_epochs=250, lambda_reg=0.01, mixed=True, L_values=[0.1], reg_values=[0.0]
)

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.


Running experiments for LR 0.5, Latent Dim 5, L 0.1, reg 0.0
Epoch 1/250, Loss: 8.227251
Epoch 2/250, Loss: 5.231321
Epoch 3/250, Loss: 4.196105
Epoch 4/250, Loss: 3.838968
Epoch 5/250, Loss: 3.570315
Epoch 6/250, Loss: 3.280377
Epoch 7/250, Loss: 2.990706
Epoch 8/250, Loss: 2.717505
Epoch 9/250, Loss: 2.455677
Epoch 10/250, Loss: 2.199248
Epoch 11/250, Loss: 1.956801
Epoch 12/250, Loss: 1.746573
Epoch 13/250, Loss: 1.582402
Epoch 14/250, Loss: 1.465203
Epoch 15/250, Loss: 1.384848
Epoch 16/250, Loss: 1.328225
Epoch 17/250, Loss: 1.286037
Epoch 18/250, Loss: 1.254279
Epoch 19/250, Loss: 1.231530
Epoch 20/250, Loss: 1.215845
Epoch 21/250, Loss: 1.204028
Epoch 22/250, Loss: 1.192982
Epoch 23/250, Loss: 1.181024
Epoch 24/250, Loss: 1.167932
Epoch 25/250, Loss: 1.154128
Epoch 26/250, Loss: 1.139983
Epoch 27/250, Loss: 1.125763
Epoch 28/250, Loss: 1.111881
Epoch 29/250, Loss: 1.098927
Epoch 30/250, Loss: 1.087412
Epoch 31/250, Loss: 1.077592
Epoch 32/250, Loss: 1.069552
Epoch 33/250, Loss: 

: 

In [9]:
results = matrix_factorization_experiment(
    all_matrices, method="Adam+Classic", learning_rates=[0.5], latent_dims=[5],
    num_epochs=1000, lambda_reg=0.01, mixed=True, L_values=[0.1], reg_values=[0.0]
)

Running experiments for LR 0.5, Latent Dim 5, L 0.1, reg 0.0
