# [Build the Neural Network](https://pytorch.org/tutorials/beginner/basics/buildmodel_tutorial.html)
`torch.nn` contains everything for building neural networks.
A neural network layer is the same thing as a module?

In [1]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms



## Check Device for Training
For information on [MPS](https://pytorch.org/docs/stable/notes/mps.html)

In [4]:
# Check if a hardware accelerator is available
# MPS (Metal Performance Shaders framework) enables using the GPU on MacOS devices using the Metal programming framework!
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print("device", device)

device mps


## Define the Class

In [10]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )

    # Operation on input data
    # Never call this directly!
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [12]:
# Create an instance of the NeuralNetwork class and move it to the device
model = NeuralNetwork().to(device)
print(model)
print("model type", type(model))

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)
model type <class '__main__.NeuralNetwork'>


In [18]:
X = torch.rand(1, 28, 28, device=device)
# `model` represents the main function in NeuralNetwork
# This internally calls the model's `forward` function, which should never be called directly
# Returns a 2-dimensional tensor, the values represent the predicted values of each label
logits = model(X)
print("logits raw", logits)
print("logits type", type(logits))
print("logits size", logits.size())

logits raw tensor([[ 0.0048,  0.0831,  0.0521, -0.0356, -0.0202, -0.0151,  0.0107, -0.0544,
          0.0689, -0.0345]], device='mps:0', grad_fn=<LinearBackward0>)
logits type <class 'torch.Tensor'>
logits size torch.Size([1, 10])


In [19]:
# Converts the raw values into a probability distribution
pred_probab = nn.Softmax(dim=1)(logits)
# Obtain the value with highest probability
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")

Predicted class: tensor([1], device='mps:0')


## Model Layers

In [21]:
# Represents a sample mini-batch of 3 images, each with size 28x28
input_image = torch.rand(3, 28, 28)
print("input_image size", input_image.size())

input_image size torch.Size([3, 28, 28])


### `nn.Flatten`
`nn.Flatten` converts the 28x28 images into a contiguous array of 784 pixels

In [24]:
flatten = nn.Flatten()
# By default, it flattens starting at dim=1
flat_image = flatten(input_image)
# The mini-batch size (dim=0) is maintained
print(flat_image.size())

torch.Size([3, 784])
