# GNN Training for MILP Problems

This notebook implements the training of a Graph Neural Network (GNN) for MILP problems. It includes:
1. Setup and installation of required packages
2. Data loading and preprocessing
3. Model training with different configurations
4. Visualization of training dynamics


In [None]:
# Mount Google Drive to access your data
from google.colab import drive
drive.mount('/content/drive')

# Install required packages
%pip install torch-geometric
%pip install matplotlib numpy


In [72]:
# Import required libraries
import os
import sys
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.amp import GradScaler, autocast
from torch.utils.data import Dataset, DataLoader, Subset
import matplotlib.pyplot as plt
from pathlib import Path
import json

# Set the path to your project directory
PROJECT_DIR = '/content/drive/MyDrive/Tlearn2rec'  # Modify this path as needed
sys.path.append(PROJECT_DIR)

# Import project modules
from gnn_model import GCNPolicy
import config
from visualization import TrainingVisualizer

# Modify GCNPolicy's forward method (assuming gnn_model.py contains the GCNPolicy class)
# This is a placeholder and assumes you have a GCNPolicy class in gnn_model.py
# You will need to manually apply this change to your gnn_model.py file.
# For demonstration, I'll show the intended change here:
# class GCNPolicy(nn.Module):
#     ...
#     def forward(self, inputs, v_labels=None):
#         constraint_features, edge_indices, edge_features, variable_features = inputs
#         ... # Rest of the forward method logic using these 4 inputs

In [48]:
# Define dataset and model classes
class MILPDataset(Dataset):
    def __init__(self, sample_files):
        self.sample_files = sample_files

    def __len__(self):
        return len(self.sample_files)

    def __getitem__(self, idx):
        data = torch.load(self.sample_files[idx], weights_only=False)
        return data

class SupervisedContrastiveLoss(nn.Module):
    def __init__(self, temperature=0.1):
        super().__init__()
        self.temperature = temperature

    def forward(self, embeddings, labels):
        labels_matrix = labels.unsqueeze(0) == labels.unsqueeze(1)
        labels_matrix.fill_diagonal_(False)

        if not labels_matrix.any():
            return torch.tensor(0.0, device=embeddings.device)

        sim_matrix = torch.matmul(embeddings, embeddings.T)
        logits_mask = torch.ones_like(sim_matrix).fill_diagonal_(0)

        exp_sim = torch.exp(sim_matrix / self.temperature)
        log_prob = (sim_matrix / self.temperature) - torch.log((exp_sim * logits_mask).sum(1, keepdim=True))

        mean_log_prob_pos = (labels_matrix * log_prob).sum(1) / labels_matrix.sum(1).clamp(min=1)
        loss = -mean_log_prob_pos

        has_positives = labels_matrix.sum(1) > 0
        loss = loss[has_positives].mean()

        return loss


In [73]:
def process(model, dataloader, criterion, optimizer=None, scaler=None, device='cpu', epoch=None, phase='train'):
    mean_loss = 0
    n_samples_processed = 0
    is_train = optimizer is not None

    if is_train:
        model.train()
    else:
        model.eval()

    for batch in dataloader:
        # Move the batch to the specified device
        batch = batch.to(device)

        # Access data directly from the Batch object
        c = batch['constraint'].x
        ei = batch[('constraint', 'includes', 'variable')].edge_index
        ev = batch[('constraint', 'includes', 'variable')].edge_attr
        v = batch['variable'].x
        v_labels = batch['variable'].y
        # n_cs and n_vs are not directly available in the batched HeteroData object in this form.
        # You might need to adjust your model or data loading/processing
        # to handle variable numbers of constraints/variables per graph in a batch.
        # For now, I will remove them from the input to the model call.
        # If n_cs and n_vs are needed, you'll need to find a way to include them
        # in the HeteroData object or calculate them during batching.

        model_input = (c, ei, ev, v) # Adjusted model input

        if is_train:
            optimizer.zero_grad()

        with autocast(device_type="cuda", enabled=(scaler is not None)):
            # Assuming the model can handle the adjusted input and returns
            # proj_embeddings and fg_labels based on the variable node features.
            proj_embeddings, fg_labels = model(model_input, v_labels)


            if proj_embeddings is not None and fg_labels is not None:
                loss = criterion(proj_embeddings, fg_labels)
            else:
                loss = torch.tensor(0.0, device=device)

        if is_train:
            if scaler is not None:
                scaler.scale(loss).backward()
                scaler.step(optimizer)
                scaler.update()
            else:
                loss.backward()
                optimizer.step()

        # The number of samples processed in a batch is the number of graphs in the batch.
        # When using torch_geometric.data.Batch, the number of graphs is available
        # as batch.num_graphs if you are batching multiple graphs.
        # If the DataLoader is configured to batch multiple graphs, use batch.num_graphs.
        # If you are processing one graph per batch (batch_size=1), n_samples_processed += 1.
        # Assuming you are batching multiple graphs:
        mean_loss += loss.item() * batch.num_graphs
        n_samples_processed += batch.num_graphs


    if n_samples_processed > 0:
        mean_loss /= n_samples_processed

    return mean_loss

In [74]:
def train_model(model, train_loader, valid_loader, criterion, optimizer, scaler, device,
                running_dir, max_epochs, early_stopping, patience, visualizer):
    best_loss = np.inf
    plateau_count = 0
    current_lr = optimizer.param_groups[0]['lr']

    for epoch in range(max_epochs + 1):
        print(f"Epoch {epoch}...")

        # Train
        train_loss = process(model, train_loader, criterion, optimizer, scaler, device, epoch, 'train')
        print(f"Train Loss: {train_loss:0.3f}")

        # Validate
        valid_loss = process(model, valid_loader, criterion, None, scaler, device, epoch, 'valid')
        print(f"Valid Loss: {valid_loss:0.3f}")

        # Update visualization
        visualizer.update(epoch, train_loss, valid_loss, current_lr)

        if valid_loss < best_loss:
            plateau_count = 0
            best_loss = valid_loss
            model.save_state(running_dir / 'best_params.pkl')
            print("Best model so far")
        else:
            plateau_count += 1
            if plateau_count >= early_stopping:
                print(f"{plateau_count} epochs without improvement, early stopping")
                break
            if plateau_count % patience == 0:
                current_lr *= 0.2
                for param_group in optimizer.param_groups:
                    param_group['lr'] = current_lr
                print(f"Decreasing learning rate to {current_lr:.1e}")

        # Plot progress
        if epoch % 10 == 0:
            visualizer.plot_training_curves()
            plt.show()
            visualizer.plot_learning_rate()
            plt.show()
            visualizer.save_history()

    return best_loss


## Training Configuration

Set up the training parameters and experiment configuration below:


In [75]:
# Training configuration
problem = 'facilities'  # or 'osif'
experiment_name = 'experiment_1'
train_size = 1.0  # fraction of training data to use
max_epochs = 1000

# Load parameters from config
train_params = config.TRAIN_PARAMS.copy()
model_params = config.MODEL_PARAMS
train_params['max_epochs'] = max_epochs

# Setup directories
running_dir = Path(PROJECT_DIR) / 'models' / problem / 'GCNPolicy' / experiment_name
os.makedirs(running_dir, exist_ok=True)

# Initialize visualizer
visualizer = TrainingVisualizer(running_dir)

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Initialize GradScaler for mixed precision training
scaler = GradScaler(device='cuda') if device.type == 'cuda' else None


Using device: cpu


In [76]:
# Load and prepare data
train_files = list((Path(PROJECT_DIR) / 'data/processed' / problem / 'train').glob('*.pt'))
valid_files = list((Path(PROJECT_DIR) / 'data/processed' / problem / 'valid').glob('*.pt'))

train_files = [str(x) for x in train_files]
valid_files = [str(x) for x in valid_files]

if train_size < 1.0:
    n_train = int(len(train_files) * train_size)
    train_files = train_files[:n_train]
    print(f"Using {n_train} training files")

train_dataset = MILPDataset(train_files)
valid_dataset = MILPDataset(valid_files)

train_loader = DataLoader(train_dataset, batch_size=train_params['batch_size'],
                         shuffle=True, collate_fn=collate_fn,
                         num_workers=train_params['num_workers'],
                         pin_memory=True)

valid_loader = DataLoader(valid_dataset, batch_size=train_params['valid_batch_size'],
                         shuffle=False, collate_fn=collate_fn,
                         num_workers=train_params['num_workers'],
                         pin_memory=True)



In [77]:
# Initialize model and training components
model = GCNPolicy(emb_size=model_params['emb_size'])
model.to(device)

criterion = SupervisedContrastiveLoss(temperature=train_params['temperature']).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=train_params['lr'])

# Train the model
best_loss = train_model(model, train_loader, valid_loader, criterion, optimizer, scaler,
                       device, running_dir, max_epochs, train_params['early_stopping'],
                       train_params['patience'], visualizer)

# Load best model and compute final validation loss
model.restore_state(running_dir / 'best_params.pkl')
final_valid_loss = process(model, valid_loader, criterion, None, scaler, device)
print(f"Best validation loss: {final_valid_loss:0.3f}")

# Final visualization
visualizer.plot_training_curves(f"Final Training Curves - {experiment_name}")
plt.show()
visualizer.plot_learning_rate()
plt.show()
visualizer.save_history()


Epoch 0...




ValueError: not enough values to unpack (expected 6, got 4)

In [68]:
# Load and inspect a single data sample
sample_data = torch.load(train_files[0], weights_only=False)
print(sample_data)

HeteroData(
  constraint={ x=[160, 263] },
  variable={
    x=[240, 262],
    var_names=[240],
    y=[240],
    train_mask=[240],
  },
  (constraint, includes, variable)={
    edge_index=[2, 640],
    edge_attr=[640, 1],
  },
  (variable, in, constraint)={
    edge_index=[2, 640],
    edge_attr=[640, 1],
  }
)


In [67]:
from torch_geometric.data import Batch

def collate_fn(batch):
    # Batch HeteroData objects using torch_geometric's Batch
    return Batch.from_data_list(batch)

## Experiment with Different Configurations

You can run multiple experiments with different configurations by modifying the parameters in the cells above. For example:

1. Try different training data sizes:
```python
train_size = 0.5  # Use 50% of training data
```

2. Try different numbers of epochs:
```python
max_epochs = 500  # Train for 500 epochs
```

3. Try different learning rates:
```python
train_params['lr'] = 0.0005  # Use a different learning rate
```

Remember to give each experiment a unique name to keep track of results:
```python
experiment_name = 'experiment_2_half_data'
```
