In [1]:
import torch 
import torch.nn as nn
import torch.nn.functional as F
import torchvision.datasets as dset
import torchvision.transforms as transforms

Building a network with -
2 Hidden layers and one output layer 

In [2]:
class Net(nn.Module):
    
    #initialising the network to have a random linear functions. Generating a whole NN.
    def __init__(self,D_in,H1,H2,D_out):
        super(Net,self).__init__()
        #now i have created a child class of Module. 
        #I can call functions of parent class 
        #and store it as objects of child class
        #Look self.Linear as a step. That randomly initialises weights and biases for all features and training example.
        #You can further retreive these variables by model.parameters()
        self.Linear1=nn.Linear(D_in,H1)
        self.Linear2=nn.Linear(H1,H2)
        #the represents the output layer
        self.Linear3=nn.Linear(H2,D_in)
    def forward(self,x):
        x=torch.sigmoid(self.Linear1(x))
        x=torch.sigmoid(self.Linear2(x))
        x=self.Linear3(x)
        return x

**Remember**  
- If you want to implement examples in one node. You can put every example in each row of a tensor torch
- And if you want to put different features. Its always better to put them in different nodes all together

In [3]:
#this has popular training datasets
train_dataset=dset.MNIST(root='./data',train=True,download=True,transform=transforms.ToTensor())

In [4]:
## setting a loss function
criterion=nn.CrossEntropyLoss()

In [5]:
# Create the model class using Relu as a activation function

class NetRelu(nn.Module):
    
    # Constructor
    def __init__(self, D_in, H1, H2, D_out):
        super(NetRelu, self).__init__()
        self.linear1 = nn.Linear(D_in, H1)
        self.linear2 = nn.Linear(H1, H2)
        self.linear3 = nn.Linear(H2, D_out)
    
    # Prediction
    def forward(self, x):
        x = F.relu(self.linear1(x))  
        x = F.relu(self.linear2(x))
        x = self.linear3(x)
        return x

`#### Jist about dataset objects.  

- These objects are sub-scriptable like any other data.
- Every Index of the object is a tensor.
- Every subscribable entry is one example. data[1] gives you the first example of the data

More info - https://courses.edx.org/courses/course-v1:IBM+DL0110EN+3T2018/courseware/b35973888fed4695b23333400faf1d6c/51f5e23d6f6942dbbd1248ef49bbd4b7/6?activate_block_id=block-v1%3AIBM%2BDL0110EN%2B3T2018%2Btype%40vertical%2Bblock%40dd9ec4d1527a4ca29bf1aed73b82fe89

In [6]:
# Train the model
# Trainloader lets you extract the data in x and y

def train(model, criterion, train_loader, validation_loader, optimizer, epochs=100):
    i = 0
    useful_stuff = {'training_loss': [], 'validation_accuracy': []}  
    
    for epoch in range(epochs):
        for i, (x, y) in enumerate(train_loader):
            optimizer.zero_grad()
            z = model(x.view(-1, 28 * 28))
            loss = criterion(z, y)
            loss.backward()
            #this decreases the model parameters according to the values retreived in above function
            optimizer.step()
            useful_stuff['training_loss'].append(loss.data.item())
        
        correct = 0
        for x, y in validation_loader:
            yhat = model(x.view(-1, 28 * 28))
            _, label = torch.max(yhat, 1)
            correct += (label == y).sum().item()
    
        accuracy = 100 * (correct / len(validation_dataset))
        useful_stuff['validation_accuracy'].append(accuracy)
    
    return useful_stuff

In [7]:
# Create the validating dataset

validation_dataset = dset.MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())


In [8]:
# Set the parameters for create the model
# image is of dimension 28*28. It feeds in the network. Output is of a digit from 1 to 10.

input_dim = 28 * 28
hidden_dim1 = 50
hidden_dim2 = 50
output_dim = 10

In [9]:
# Set the number of iterations

cust_epochs = 10

In [10]:
# Create the training data loader and validation data loader object

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=2000, shuffle=True)
validation_loader = torch.utils.data.DataLoader(dataset=validation_dataset, batch_size=5000, shuffle=False)

In [11]:
train_loader

<torch.utils.data.dataloader.DataLoader at 0x11a1d40f0>

[Jist of optimisers](https://pytorch.org/docs/stable/optim.html)

In [12]:
# Train the model with sigmoid function. Batch size is passed here only.

learning_rate = 0.01
model = Net(input_dim, hidden_dim1, hidden_dim2, output_dim)
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
training_results = train(model, criterion, train_loader, validation_loader, optimizer, epochs=cust_epochs)

In [13]:
training_results

{'training_loss': [6.614238739013672,
  6.60515022277832,
  6.586878776550293,
  6.580187797546387,
  6.563568592071533,
  6.547549247741699,
  6.542593955993652,
  6.5282368659973145,
  6.503116607666016,
  6.483527183532715,
  6.4782280921936035,
  6.468458652496338,
  6.459653377532959,
  6.445224761962891,
  6.4205851554870605,
  6.412880897521973,
  6.39442253112793,
  6.387928485870361,
  6.379085540771484,
  6.371450424194336,
  6.3432745933532715,
  6.325780391693115,
  6.323033809661865,
  6.2979631423950195,
  6.284571170806885,
  6.272216796875,
  6.26621675491333,
  6.244178771972656,
  6.239214897155762,
  6.2112250328063965,
  6.199461460113525,
  6.199078559875488,
  6.182709693908691,
  6.17387580871582,
  6.1443634033203125,
  6.144848823547363,
  6.113442420959473,
  6.109556198120117,
  6.095207691192627,
  6.075138568878174,
  6.067172050476074,
  6.049929141998291,
  6.0437750816345215,
  6.029205799102783,
  6.005720615386963,
  5.996801376342773,
  5.981299400329

Intuition behind everything dataset.

In [17]:
type(train_dataset[3000])

tuple

In [20]:
#this returns x value of tensor
type(train_dataset[3000][0])

torch.Tensor

In [22]:
#this returns y value
train_dataset[3000][1]

9

# Dropout
- Regularisation technique for nueral networks  
- In pytorch more probablity means more nuerons per layer. Dropout is used to lessen this probablity. In other words some nodes randomly as not choosen so overfitting does not occur.
- Some nodes are turn to zero. Some doesn't. Which is determined by a probablity value.

In [16]:
class Net(nn.Module):
    def __init__(self,in_size,n_hidden,out_size,p=0):
        super(Net,self).__init__()
        self.drop=nn.Dropout(p=p)
        self.linear1=nn.Linear(in_size,n_hidden)
        self.linear2=nn.Linear(n_hidden,n_hidden)
        self.linear3=nn.Linear(n_hidden,out_size)
    def forward(self,x):
        x=F.relu(self.linear1(x))
        x=self.dropout(x)
        x=F.relu(self.linear2(x))
        x=self.drop(x)
        x=self.linear3(x)
        return x
    
    