In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
from accelerate import Accelerator

# Initialize accelerator
accelerator = Accelerator()

# Define a simple 2-layer MLP
class MLP(nn.Module):
    def __init__(self, input_size=784, hidden_size=256, output_size=10):
        super(MLP, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.layer2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.layer1(x)
        x = self.relu(x)
        x = self.layer2(x)
        return x


In [4]:
# Create synthetic data for demonstration
def create_dummy_data(num_samples=10000, input_size=784, num_classes=10):
    X = np.random.randn(num_samples, input_size).astype(np.float32)
    y = np.random.randint(0, num_classes, size=(num_samples,)).astype(np.int64)
    return X, y


In [5]:
# Training function
def train_model(model, train_loader, optimizer, criterion, num_epochs=5):
    model.train()

    for epoch in range(num_epochs):
        running_loss = 0.0
        correct = 0
        total = 0

        for inputs, labels in train_loader:
            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Backward and optimize
            optimizer.zero_grad()
            accelerator.backward(loss)  # Use accelerator.backward instead of loss.backward()
            optimizer.step()

            running_loss += loss.item()

            # Calculate accuracy
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        # Print statistics
        epoch_loss = running_loss / len(train_loader)
        epoch_acc = 100 * correct / total

        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.2f}%')



In [7]:
def main():
    # Hyperparameters
    input_size = 784
    hidden_size = 256
    output_size = 10
    batch_size = 128
    learning_rate = 0.001
    num_epochs = 5

    # Create model, loss function, and optimizer
    model = MLP(input_size, hidden_size, output_size)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # Generate synthetic data
    X_train, y_train = create_dummy_data(num_samples=10000, input_size=input_size, num_classes=output_size)

    # Create DataLoader
    train_dataset = TensorDataset(torch.from_numpy(X_train), torch.from_numpy(y_train))
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    # Prepare for distributed training with accelerate
    model, optimizer, train_loader = accelerator.prepare(model, optimizer, train_loader)

    # Train the model
    train_model(model, train_loader, optimizer, criterion, num_epochs)

    # Save the model (unwrapped)
    unwrapped_model = accelerator.unwrap_model(model)
    torch.save(unwrapped_model.state_dict(), "mlp_model.pth")
    print("Model saved successfully!")



In [9]:
if __name__ == "__main__":
    main()

Epoch 1/5, Loss: 2.3321, Accuracy: 9.90%
Epoch 2/5, Loss: 2.0208, Accuracy: 34.48%
Epoch 3/5, Loss: 1.7389, Accuracy: 44.91%
Epoch 4/5, Loss: 1.4197, Accuracy: 57.77%
Epoch 5/5, Loss: 1.0609, Accuracy: 73.23%
Model saved successfully!
