In [7]:
import torch
import torch.nn as nn #all NN modules are in this, loss functns also
import torch.optim as optim #all optimaizatn algos like adam sgd in this
import torch.nn. functional as F #it have activatn fucntions like relu n all, nn also have them
from torch.utils.data import DataLoader #easier dataset management
import torchvision.datasets as datasets #for getting data from torch
import torchvision.transforms as transforms #transformatns that we can perform on our dataset

In [8]:
# Create Fully Connected Network
class NN(nn.Module): #inherit nn module

  def __init__(self, input_size, num_classes):
    super(NN, self).__init__()
    self.fc1 = nn.Linear(input_size, 50)
    self.fc2 = nn.Linear(50, num_classes)

  def forward (self, x):
    x = F.relu(self.fc1(x))
    x = self.fc2(x)
    return x

In [9]:
model= NN(784, 10)
x = torch.randn(64, 784)
pred= model(x).shape
print(pred)

torch.Size([64, 10])


In [10]:
# Set device
device = torch.device('cuda' if torch.cuda.is_available() else "cpu")
device = torch.device("mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu")

In [11]:
device

device(type='mps')

In [12]:
# Hyperparameters
input_size = 784
num_classes = 10
learning_rate = 0.01
batch_size = 128
num_epochs = 5

In [13]:
# Load Data
train_dataset = datasets.MNIST(root='data/', train=True, transform=transforms.ToTensor (), download=False)
#data come in dataset/ and it is in numpy so transorfm change to tensor and if not in root then download
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)


test_dataset = datasets.MNIST(root='data/', train=False, transform=transforms.ToTensor (), download=False)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [14]:
for im, l in test_loader:
  print(im.shape, l)
  break

torch.Size([128, 1, 28, 28]) tensor([7, 2, 1, 0, 4, 1, 4, 9, 5, 9, 0, 6, 9, 0, 1, 5, 9, 7, 3, 4, 9, 6, 6, 5,
        4, 0, 7, 4, 0, 1, 3, 1, 3, 4, 7, 2, 7, 1, 2, 1, 1, 7, 4, 2, 3, 5, 1, 2,
        4, 4, 6, 3, 5, 5, 6, 0, 4, 1, 9, 5, 7, 8, 9, 3, 7, 4, 6, 4, 3, 0, 7, 0,
        2, 9, 1, 7, 3, 2, 9, 7, 7, 6, 2, 7, 8, 4, 7, 3, 6, 1, 3, 6, 9, 3, 1, 4,
        1, 7, 6, 9, 6, 0, 5, 4, 9, 9, 2, 1, 9, 4, 8, 7, 3, 9, 7, 4, 4, 4, 9, 2,
        5, 4, 7, 6, 7, 9, 0, 5])


In [15]:
# Initialize network
model = NN(input_size=input_size, num_classes=num_classes).to(device)

In [16]:
print(model)

NN(
  (fc1): Linear(in_features=784, out_features=50, bias=True)
  (fc2): Linear(in_features=50, out_features=10, bias=True)
)


In [17]:
for i in model.parameters():
    print(i)
    break

Parameter containing:
tensor([[-0.0131,  0.0213,  0.0341,  ..., -0.0048,  0.0181, -0.0201],
        [-0.0352,  0.0257,  0.0061,  ..., -0.0240,  0.0151, -0.0109],
        [ 0.0354,  0.0108, -0.0114,  ...,  0.0174,  0.0034, -0.0247],
        ...,
        [ 0.0203, -0.0147, -0.0342,  ...,  0.0356, -0.0264, -0.0169],
        [ 0.0171, -0.0080,  0.0175,  ..., -0.0063,  0.0336, -0.0307],
        [ 0.0101, -0.0184,  0.0201,  ...,  0.0291,  0.0029,  0.0243]],
       device='mps:0', requires_grad=True)


In [18]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss ()
optimizer = optim.Adam(model.parameters (), lr=learning_rate)

In [19]:
# Train Network
for epoch in range(num_epochs):
  print(f"epoch {epoch+1}/{num_epochs}")
  for batch_idx, (data, targets) in enumerate(train_loader):
    # Get data to cuda if possible
    data = data.to(device=device)
    targets = targets. to(device=device)
    # Get to correct shape
    data = data.reshape (data. shape [0], -1)
    # forward
    scores = model(data)
    loss = criterion(scores, targets) #loss functn
    # backward
    optimizer.zero_grad() #By zeroing out the gradients, we ensure that the gradients from the previous iteration do not accumulate.
    loss.backward() #the gradients are calculated recursively starting from the output layer and propagating backward through the network.
    # gradient descent or adam step
    optimizer .step() #After computing the gradients, this line updates the parameters of the model using an optimization algorithm, such as gradient descent or Adam optimization

epoch 1/5
epoch 2/5
epoch 3/5
epoch 4/5
epoch 5/5


In [26]:
# Check accuracy on training & test to see how good our model
def check_accuracy(loader, model) :
  if loader.dataset.train:
    print ('Checking accuracy on training data')
  else:
    print ('Checking accuracy on test data')
  num_correct = 0
  num_samples = 0
  model.eval() #evaluation mode so droupouts and batch norm will be in proper way
  with torch.no_grad(): #no grad calculated
    for x, y in loader:
      x = x. to(device=device)
      y = y.to(device=device)
      x = x. reshape (x. shape [0], -1)
      scores = model(x)
      _, predictions = scores.max(1)
      num_correct += (predictions == y) .sum()
      num_samples += predictions.size (0)

    print (f'Got {num_correct} / {num_samples} with accuracy {float (num_correct)/float (num_samples) *100: .2f} ')
  model. train() #for again making model to train mode

In [27]:
check_accuracy (train_loader, model)
check_accuracy (test_loader, model)

Checking accuracy on training data
Got 58478 / 60000 with accuracy  97.46 
Checking accuracy on test data
Got 9602 / 10000 with accuracy  96.02 
