In [2]:
import torch.nn as nn
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.optim as optim


In [3]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,),(0.5,))
])

In [4]:
minst_train = datasets.MNIST(root='./data',train = True,transform=transform,download=True)
minst_test= datasets.MNIST(root='./data',train = False,transform=transform,download=True)

In [6]:
train_loader = DataLoader(minst_train,batch_size=20,shuffle=True)

### we will create a simple NN with 28 input layers and 10 output layers


In [7]:
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN,self).__init__()

        self.fc1 = nn.Linear(28 * 28,128)
        self.fc2 = nn.Linear(128 , 10)
        self.dropout = nn.Dropout(p = 0.5)
    def forward(self,x):
        x = x.view(-1, 28 * 28)  # Flatten the image (batch_size, 28*28)
        x = torch.relu(self.fc1(x))  # Apply ReLU activation
        x = self.dropout(x)  # Apply dropout
        x = self.fc2(x)  # Output layer
        return x

In [8]:
model = SimpleNN()

# 5. Define the Loss Function and Optimizer
criterion = nn.CrossEntropyLoss()  # Cross-entropy loss for classification
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Adam optimizer


In [9]:

# 6. Training the Model
num_epochs = 10  # Number of epochs

In [10]:

for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    running_loss = 0.0
    correct = 0
    total = 0
    
    for images, labels in train_loader:
        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)
        
        # Calculate loss
        loss = criterion(outputs, labels)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        # Statistics
        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    # Print training statistics
    epoch_loss = running_loss / len(train_loader)
    accuracy = 100 * correct / total
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {accuracy:.2f}%')


Epoch [1/10], Loss: 0.5702, Accuracy: 82.23%
Epoch [2/10], Loss: 0.4180, Accuracy: 86.95%
Epoch [3/10], Loss: 0.3758, Accuracy: 88.37%
Epoch [4/10], Loss: 0.3569, Accuracy: 88.92%
Epoch [5/10], Loss: 0.3425, Accuracy: 89.27%
Epoch [6/10], Loss: 0.3340, Accuracy: 89.66%
Epoch [7/10], Loss: 0.3223, Accuracy: 89.92%
Epoch [8/10], Loss: 0.3204, Accuracy: 89.79%
Epoch [9/10], Loss: 0.3090, Accuracy: 90.35%
Epoch [10/10], Loss: 0.3075, Accuracy: 90.38%


In [12]:
test_loader = DataLoader(minst_test,batch_size=20,shuffle=True)

In [14]:
# 7. Evaluate the Model on Test Data
model.eval()  # Set the model to evaluation mode
correct = 0
total = 0
with torch.no_grad():  # No need to compute gradients for evaluation
    for images, labels in test_loader:
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

# Print test accuracy
test_accuracy = 100 * correct / total
print(f'Accuracy of the model on the test data: {test_accuracy:.2f}%')

# 8. Save the Model
torch.save(model.state_dict(), './nn/minst')
print("Model saved as 'mnist_model.pth'")

Accuracy of the model on the test data: 95.34%
Model saved as 'mnist_model.pth'
