<a href="https://colab.research.google.com/github/jojaritz/MNIST_optimizing/blob/main/MNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [18]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
import torch.nn.functional as F

In [19]:
BATCH_SIZE =64 #how many images get processed at the same time (a GPU)
LEARING_RATE = 0.1
EPOCHS = 10

In [20]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,),(0.5,)) #centers values so that values range from -1 to 1
])

In [21]:
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)


In [22]:
class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()
#nn.Conv2d it takes each of the values in the 3x3 , then multiplies it by a weight and adds them, then adds a bias (the function uses a bell curve to choose initial random values)
    self.conv1 = nn.Conv2d(in_channels=1, out_channels = 32, kernel_size=3, stride=1)#kernal size is how many pixels (3x3) for its shape; stride is how much the frame moves each time
    self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1) #the second one is building on the data that it first got ^ and gets a bigger picture of what happened

    self.fc1 = nn.Linear(in_features=64*12*12, out_features=10)




  def forward(self, x):
    x = self.conv1(x)
    x = torch.relu(x)


    x= self.conv2(x)

    x= torch.relu(x)
    x = F.max_pool2d(x, 2)

    x = torch.flatten(x,1)

    x=self.fc1(x) # this is basically a version of conv but instead it looks at all of the pixel data and makes the wieghts into 10 outputs (a kernal of all pixels *26)

    return x

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net().to(device)


criterion = nn.CrossEntropyLoss()

optimizer = optim.SGD(model.parameters(), lr=LEARING_RATE)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.7)


print(f"Training on {device}...")

for epoch in range(EPOCHS):
  running_loss = 0.0



  for i, (inputs, labels) in enumerate(train_loader):
    inputs, labels = inputs.to(device), labels.to(device)

    optimizer.zero_grad()

    outputs = model(inputs)

    loss = criterion(outputs,labels)

    loss.backward()

    optimizer.step()

    running_loss += loss.item()

    if(i+1) % 100 ==0:
      print(f'Epoch [{epoch+1}/{EPOCHS}], Step [{len(train_loader)}], Loss: {running_loss/100:.4f}')
      running_loss = 0.0
  scheduler.step()
  current_lr = scheduler.get_last_lr()[0]
  print(f"End of Epoch {epoch+1}. New Learning Rate: {current_lr:.5f}")

print("Finished Training")

Training on cuda...
Epoch [1/10], Step [938], Loss: 0.6059
Epoch [1/10], Step [938], Loss: 0.1753
Epoch [1/10], Step [938], Loss: 0.1446
Epoch [1/10], Step [938], Loss: 0.1071
Epoch [1/10], Step [938], Loss: 0.1031
Epoch [1/10], Step [938], Loss: 0.0907
Epoch [1/10], Step [938], Loss: 0.0848
Epoch [1/10], Step [938], Loss: 0.0768
Epoch [1/10], Step [938], Loss: 0.0750
End of Epoch 1. New Learning Rate: 0.07000
Epoch [2/10], Step [938], Loss: 0.0596
Epoch [2/10], Step [938], Loss: 0.0479
Epoch [2/10], Step [938], Loss: 0.0569
Epoch [2/10], Step [938], Loss: 0.0508
Epoch [2/10], Step [938], Loss: 0.0561
Epoch [2/10], Step [938], Loss: 0.0529
Epoch [2/10], Step [938], Loss: 0.0494
Epoch [2/10], Step [938], Loss: 0.0506
Epoch [2/10], Step [938], Loss: 0.0405
End of Epoch 2. New Learning Rate: 0.04900
Epoch [3/10], Step [938], Loss: 0.0385
Epoch [3/10], Step [938], Loss: 0.0381
Epoch [3/10], Step [938], Loss: 0.0375
Epoch [3/10], Step [938], Loss: 0.0317
Epoch [3/10], Step [938], Loss: 0.04

In [23]:
# --- 5. THE TESTBENCH (Quality Control) ---
# Note: We use train=False to get the 10,000 images the model has NEVER seen.
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)

print("\nStarting Testing...")

# 1. Lock the model (Batch Norm/Dropout behave differently in Test mode)
model.eval()

correct = 0
total = 0

# 2. Turn off the Engine (No Gradients = No Learning, just Predicting)
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        # Calculate outputs
        outputs = model(inputs)

        # Get the predicted class (The index of the highest score)
        # _ is the max value (we don't care), predicted is the index (we care)
        _, predicted = torch.max(outputs.data, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Accuracy of the model on the 10,000 test images: {accuracy:.2f}%')

# Optional: Save the model weights for Dr. Wang to inspect later
torch.save(model.state_dict(), 'mnist_cnn_hardware_ready.pth')
print("Model saved to mnist_cnn_hardware_ready.pth")


Starting Testing...
Accuracy of the model on the 10,000 test images: 98.81%
Model saved to mnist_cnn_hardware_ready.pth
