In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, RandomSampler

# Set random seed for reproducibility
def set_seed(seed=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

# Example dataset
X = torch.randn(100, 10)  # 100 samples, 10 features
y = torch.randint(0, 2, (100,))  # 100 binary labels

# Create a dataset and data loader with shuffling enabled
dataset = TensorDataset(X, y)

# Function to train with deterministic shuffling
def train_model(seed=42, num_epochs=5):
    set_seed(seed)

    # Define a simple model
    model = nn.Sequential(
        nn.Linear(10, 50),
        nn.ReLU(),
        nn.Linear(50, 2)
    )

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    for epoch in range(num_epochs):
        set_seed(seed + epoch)  # Ensure deterministic shuffle per epoch

        # Use RandomSampler to shuffle data deterministically
        sampler = RandomSampler(dataset, generator=torch.Generator().manual_seed(seed + epoch))
        dataloader = DataLoader(dataset, batch_size=16, sampler=sampler)

        epoch_loss = 0
        for batch_X, batch_y in dataloader:
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()

        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}")

# Train the model with deterministic shuffling
train_model()


Epoch 1/5, Loss: 5.0556
Epoch 2/5, Loss: 4.9304
Epoch 3/5, Loss: 4.8711
Epoch 4/5, Loss: 4.7943
Epoch 5/5, Loss: 4.7700
