In [15]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

In [16]:
# Define the Multi-Layer Perceptron (MLP) model using nn.Sequential
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, output_size)
        )
    
    def forward(self, x):
        return self.model(x)

In [17]:
# Hyperparameters
input_size = 784  # Example for MNIST dataset (28x28 images)
hidden_size = 500
output_size = 10  # Number of classes (for MNIST, digits 0-9)
num_epochs = 5
batch_size = 100
learning_rate = 0.001

In [18]:
# Generate some dummy data (for illustration purposes)
# In practice, you would use a real dataset like MNIST
x_train = torch.randn(60000, input_size)
y_train = torch.randint(0, output_size, (60000,))

In [19]:
# Convert to dataset and dataloader
train_dataset = TensorDataset(x_train, y_train)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)


In [20]:
# Initialize the model, loss function, and optimizer
model = MLP(input_size, hidden_size, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [7]:
# Training loop CPU
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}')


Epoch [1/5], Step [100/600], Loss: 2.3561
Epoch [1/5], Step [200/600], Loss: 2.3620
Epoch [1/5], Step [300/600], Loss: 2.3579
Epoch [1/5], Step [400/600], Loss: 2.3048
Epoch [1/5], Step [500/600], Loss: 2.3244
Epoch [1/5], Step [600/600], Loss: 2.3199
Epoch [2/5], Step [100/600], Loss: 2.2450
Epoch [2/5], Step [200/600], Loss: 2.2875
Epoch [2/5], Step [300/600], Loss: 2.2810
Epoch [2/5], Step [400/600], Loss: 2.2264
Epoch [2/5], Step [500/600], Loss: 2.3044
Epoch [2/5], Step [600/600], Loss: 2.3443
Epoch [3/5], Step [100/600], Loss: 2.0260
Epoch [3/5], Step [200/600], Loss: 2.1488
Epoch [3/5], Step [300/600], Loss: 2.1220
Epoch [3/5], Step [400/600], Loss: 2.2501
Epoch [3/5], Step [500/600], Loss: 2.2245
Epoch [3/5], Step [600/600], Loss: 2.1885
Epoch [4/5], Step [100/600], Loss: 1.7727
Epoch [4/5], Step [200/600], Loss: 1.8452
Epoch [4/5], Step [300/600], Loss: 1.7961
Epoch [4/5], Step [400/600], Loss: 1.7549
Epoch [4/5], Step [500/600], Loss: 1.9244
Epoch [4/5], Step [600/600], Loss:

In [21]:
# Training loop GPU

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
# Move the model to the GPU
model.to(device)

# Training loop
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # Move images and labels to the GPU
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}')


Epoch [1/5], Step [100/600], Loss: 2.3890
Epoch [1/5], Step [200/600], Loss: 2.3031
Epoch [1/5], Step [300/600], Loss: 2.3525
Epoch [1/5], Step [400/600], Loss: 2.3286
Epoch [1/5], Step [500/600], Loss: 2.3137
Epoch [1/5], Step [600/600], Loss: 2.3385
Epoch [2/5], Step [100/600], Loss: 2.2646
Epoch [2/5], Step [200/600], Loss: 2.2344
Epoch [2/5], Step [300/600], Loss: 2.2344
Epoch [2/5], Step [400/600], Loss: 2.3548
Epoch [2/5], Step [500/600], Loss: 2.3089
Epoch [2/5], Step [600/600], Loss: 2.3307
Epoch [3/5], Step [100/600], Loss: 2.1884
Epoch [3/5], Step [200/600], Loss: 2.1998
Epoch [3/5], Step [300/600], Loss: 2.1504
Epoch [3/5], Step [400/600], Loss: 2.1529
Epoch [3/5], Step [500/600], Loss: 2.1320
Epoch [3/5], Step [600/600], Loss: 2.2872
Epoch [4/5], Step [100/600], Loss: 1.8406
Epoch [4/5], Step [200/600], Loss: 1.8064
Epoch [4/5], Step [300/600], Loss: 1.8373
Epoch [4/5], Step [400/600], Loss: 1.8680
Epoch [4/5], Step [500/600], Loss: 1.9216
Epoch [4/5], Step [600/600], Loss:

In [8]:
# Save the model checkpoint
torch.save(model.state_dict(), 'mlp_model.pth')
print('Model saved to mlp_model.pth')

Model saved to mlp_model.pth


In [9]:
# # Initialize the model (with the same architecture)
# model = MLP(input_size, hidden_size, output_size)

# # Load the model parameters from the saved file
# model.load_state_dict(torch.load('mlp_model.pth'))
# model.eval()  # Set the model to evaluation mode

# # Now the model is ready to be used for inference
