In [3]:
%load_ext autoreload
%autoreload 2
import torch
import torch.optim as optim
import argparse
import os
from tqdm import tqdm
import sys
sys.path.append('../')

from src.models.SAE import StackedSparseAutoencoder
from src.utils.conn_data import save_pickle
from src.utils.parsers import str_2_bool
from src.data.Simulation1Loader import Simulation1Loader

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

class StackedSparseAutoencoder(nn.Module):
    def __init__(self,
                 input_size: int,
                 hidden_sizes: list,
                 bias: bool = True,
                 dropout: float = 0.0,
                 sparsity_penalty: float = 1e-4):
        super(StackedSparseAutoencoder, self).__init__()

        # parameters
        self.input_size = input_size
        self.hidden_sizes = hidden_sizes
        self.sparsity_penalty = sparsity_penalty

        # encoder
        encoder_layers = []
        prev_size = input_size
        for hidden_size in hidden_sizes:
            encoder_layers.append(nn.Linear(prev_size, hidden_size, bias=bias))
            encoder_layers.append(nn.ReLU(inplace=True))
            if dropout > 0:
                encoder_layers.append(nn.Dropout(dropout))
            prev_size = hidden_size
        self.encoder = nn.Sequential(*encoder_layers)

        # decoder
        decoder_layers = []
        hidden_sizes.reverse()  # reverse the hidden sizes for symmetric decoder
        for hidden_size in hidden_sizes[:-1]:
            decoder_layers.append(nn.Linear(prev_size, hidden_size, bias=bias))
            decoder_layers.append(nn.ReLU(inplace=True))
            if dropout > 0:
                decoder_layers.append(nn.Dropout(dropout))
            prev_size = hidden_size
        # add the last layer without ReLU to reconstruct the input
        decoder_layers.append(nn.Linear(prev_size, input_size, bias=bias))
        self.decoder = nn.Sequential(*decoder_layers)

    def forward(self, x):
        z = self.encoder(x)
        recon_x = self.decoder(z)
        return recon_x, z

    def loss_function(self, recon_x, x):
        # Mean squared error for reconstruction loss
        recon_loss = F.mse_loss(recon_x, x)

        # L1 loss for sparsity penalty
        sparsity_loss = 0
        for layer in self.encoder:
            if isinstance(layer, nn.Linear):
                sparsity_loss += torch.sum(torch.abs(layer.weight))

        total_loss = recon_loss + self.sparsity_penalty * sparsity_loss
        return total_loss


# Model training

In [5]:
dataset_name = 'simulation1'
sample = False
batch_size = 1
model_name ="sae"
input_size = 100
hidden_sizes = [50, 25, 50]
dropout = 0.5
learning_rate = 0.001
epochs = 10
sparsity_penalty = 1e-4

# define dataset
sim = Simulation1Loader(name=dataset_name, sample=sample)
loader = sim.create_graph_loader(batch_size=batch_size)

# define model
model = StackedSparseAutoencoder(input_size=input_size,
                                    hidden_sizes=hidden_sizes,
                                    dropout=dropout,
                                    sparsity_penalty=sparsity_penalty)

# define optimizer
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# initialize tqdm
pbar = tqdm(range(epochs))

for epoch in pbar:
    epoch_loss = 0
    for data in loader:
        x = data.x.view(-1, input_size)  # Adjust shape if necessary

        # forward pass
        recon_x, _ = model(x)

        # compute loss
        loss = model.loss_function(recon_x, x)

        # backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    # update tqdm
    pbar.set_description(f"Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss/len(loader):.4f}")

# Save model and results
model_name = f'{model_name}_{input_size}_{"_".join(map(str, hidden_sizes))}'
output_path = f"{os.path.dirname(__file__)}/data/outputs/{dataset_name}/{model_name}"
if not os.path.exists(output_path):
    os.makedirs(output_path)

# Modify saving mechanism as per your requirement
torch.save(model.state_dict(), f"{output_path}/model.pth")

# Additional code for saving results, testing, etc. can be added here


FileNotFoundError: [Errno 2] No such file or directory: '/Users/maruanottoni/home/master/research/graph-corr-embedd/notebooks/../src/data/inputs/simulation1/all_graph_info.pkl'