#### We will use mnist dataset as example

In [4]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

#### steps
1. download dataset
2. create data loader
3. build model
4. train
5. save trained model

In [7]:
#step 1
def download_mnist_datasets():
    train_data = datasets.MNIST(
    root ="data", #tell pytorch where to store the data, it will store them in folder data
    download = True,
    train = True,
    transform = ToTensor() #ToTensor takes the image n and reshapes a new tensor where each value is normalized between 0 and 1)
    )
    validation_data = datasets.MNIST(
    root ="data", #tell pytorch where to store the data, it will store them in folder data
    download = True,
    train = False,
    transform = ToTensor())
    return train_data, validation_data

In [34]:
train_data, validation_data = download_mnist_datasets()

In [35]:
#step 2 : Dataloader for the train set
#Dataloader is a class we can use to wrap a data set, in our case the train 
#data and it allows us to load data in batches. So it allows us that are very 
#heavy in memory!!
BATCH_SIZE=128
train_data_loader = DataLoader(train_data, batch_size= BATCH_SIZE)

In [36]:
#step 3: build model/class
#to define a model in Pytorch we have to define 2 methods: the constructor where we define
#all the layers
#and method forward to describe the network

class FeedForwardNet(nn.Module):
    
    def __init__(self):
        super().__init__() #consructor of base class
        self.flatten = nn.Flatten() #first layer is called flatten, it can have any name
        self.dense_layers = nn.Sequential(
            #sequential allows us to pack together multiple layers and the data flow sequentialy from one level to the next
            nn.Linear(28*28, 256), #the first dense layer, 256 neurons, 28x28 images
            nn.ReLU(),
            nn.Linear(256,10) # we have 10 classes)
        )
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self,input_data): #it indicates pytorch how to manipulate the data
        flattened_data = self.flatten(input_data) #we pass input data to the flatten data and take x
        logits = self.dense_layers(flattened_data) #logits are outputs
        predictions = self.softmax(logits)
        return predictions

In [37]:
if torch.cuda.is_available():
    #if gpu accelaration is available
    device ="cuda"
else:
    device ="cpu"
print(device)

cpu


In [38]:
feed_forward_net = FeedForwardNet().to(device)

In [39]:
#train model: we will use 2 functions. In train we'll go through all the epochs
#that we want to train the model for and we'll call at each pass train_one_epoch
def train_one_epoch(model, data_loader, loss_fn, optimiser, device):
    for inputs, targets in data_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        #calculate loss
        predictions = model(inputs)
        loss = loss_fn(predictions, targets)
        #backpropagate loss and update weights
        optimiser.zero_grad() #the gradients at each itteration gets saved, we want at
        #each train itteration to reset to zero(zero grad) to start from scratch
        loss.backward()
        optimiser.step() #update the weights
    print(f"Loss: {loss.item()}") #print the loss for the last batch that we have
    
    
def train(model, data_loader, loss_fn, optimiser, device, epochs):
    for i in range(epochs):
        print(f"epoch {i+1}")
        train_one_epoch(model, data_loader, loss_fn, optimiser, device)
        print("----------------")
    print("Trainig is done.")

In [40]:
#train the model 
EPOCHS = 10
LEARNING_RATE = 0.001
loss_fn = nn.CrossEntropyLoss()
optimiser = torch.optim.Adam(feed_forward_net.parameters(), lr =LEARNING_RATE)
train(feed_forward_net, train_data_loader, loss_fn, optimiser, device, EPOCHS)

#we want to store the model that we train!
torch.save(feed_forward_net.state_dict(),"feedforwardnet.pth")
print("model trained and stored at feedforwardnet.pth")

epoch 1
Loss: 1.5148301124572754
----------------
epoch 2
Loss: 1.497542381286621
----------------
epoch 3
Loss: 1.4873720407485962
----------------
epoch 4
Loss: 1.4819507598876953
----------------
epoch 5
Loss: 1.473847508430481
----------------
epoch 6
Loss: 1.472737431526184
----------------
epoch 7
Loss: 1.47238028049469
----------------
epoch 8
Loss: 1.4723714590072632
----------------
epoch 9
Loss: 1.4745192527770996
----------------
epoch 10
Loss: 1.4726325273513794
----------------
Trainig is done.
model trained and stored at feedforwardnet.pth


#### Now we will make predictions with the model we created.

In [41]:
#load back the model
feed_forward_net = FeedForwardNet()
state_dict = torch.load("feedforwardnet.pth")
feed_forward_net.load_state_dict(state_dict)

<All keys matched successfully>

In [56]:
def predict(model, input, target, class_mapping):
    model.eval() #eval is like a switch that changes, if we activate eval certain layers turn of because we dont need them for inference
    with torch.no_grad(): #not need to calculate gradients for inference, just for training
        predictions = model(input)
        #predictions is a Tensor with dimention (number of samples passing in model x number of classes we try to predict) =(1x10)
        #Tensor (1,10) ---> [ [0.1, 0.01, ... , 0.6]] the sum will be 1 because of softmax
        predicted_index = predictions[0].argmax(0)
        predicted = class_mapping[predicted_index]
        expected = class_mapping[target]
    return predicted, expected

In [57]:
#get a sample from the validation dataset for inference
input, target = validation_data[0][0], validation_data[0][1]
#make an inference
class_mapping = [
    "0",
    "1",
    "2",
    "3",
    "4",
    "5",
    "6",
    "7",
    "8",
    "9"
]

predicted, expected = predict(feed_forward_net, input, target, class_mapping)

In [58]:
print(f"Predicted: '{predicted}', expected: '{expected}'")

Predicted: '7', expected: '7'
