# Import libraries

In [2]:
import os
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
from tensorflow.keras.datasets import mnist

# Torch dependencies
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim

from torchvision import transforms
from torch.utils.data import Dataset, DataLoader

plt.style.use('seaborn-darkgrid')

# Artificial Neural Networks (ANN)

* A basic neural network architecture consists of many artificial perceptrons :
![ANN illustration](https://www.researchgate.net/profile/Facundo-Bre/publication/321259051/figure/fig1/AS:614329250496529@1523478915726/Artificial-neural-network-architecture-ANN-i-h-1-h-2-h-n-o.png)

## 1. What is a neural network ?
* A neural network is a system of neurons that is responsible for the "thinking" made by human. An artificial neural network is a computation system that is inspired by the structure of human's neural network and used to replicate our decision making process. The basic unit of a neural network is an artificial perceptron.
![Perceptron illustration](https://www.allaboutcircuits.com/uploads/articles/how-to-train-a-basic-perceptron-neural-network_rk_aac_image1.jpg)

## 2. A simple neural network in pytorch

In [7]:
# Define a neural network as subclass of nn.Module
class ANN(nn.Module):
    def __init__(self, in_features, hidden=16, n_classes=10):
        super(ANN, self).__init__()
        self.input_layer = nn.Linear(in_features=in_features, out_features=hidden)
        self.hidden_layer = nn.Linear(in_features=hidden, out_features=n_classes)
        self.output_layer = nn.Softmax(dim=1)
        
    def forward(self, inputs):
        outputs = self.input_layer(inputs)
        outputs = self.hidden_layer(outputs)
        outputs = self.output_layer(outputs)
        
        return outputs
    
x = torch.normal(0, 1, size=(4, 12))
model = ANN(12, hidden=8, n_classes=2)
print(model)

ANN(
  (input_layer): Linear(in_features=12, out_features=8, bias=True)
  (hidden_layer): Linear(in_features=8, out_features=2, bias=True)
  (output_layer): Softmax(dim=1)
)


## 3. Activation functions
* If neural networks only consist of fully connected perceptrons activated by the dot product, the neural networks will become a linear model and would pretty much be useless learning non-linear patterns. To break this linearity nature, we have multiple activation functions to induce non-linearity:
![Activation functions](https://miro.medium.com/max/1200/1*ZafDv3VUm60Eh10OeJu1vw.png)

* Some of the commonly used activation functions are :
    - Rectified Linear Unet (ReLU) : Usually used in hidden layers or intermidiate layers. The reason why ReLU is used more favourably than Sigmoid and Tanh in hidden layers is because the gradient of ReLU is always constant when the input is non-negative. Hence, less likely to result in vanishing gradients.
    - Sigmoid and Tanh : Usually used in the output layer in the case of **Binary classification**.
    - Softmax : Usually used in the output layer in the case of **Multi-class classification**

In [9]:
# Define a neural network with activation functions in the constructor
# You can copy this neural network architecture and use it in the previous session exercise to play with it abit
class ANNWithAct(nn.Module):
    def __init__(self, in_features, hidden=16, n_classes=10):
        super(ANNWithAct, self).__init__()
        self.input_layer = nn.Linear(in_features=in_features, out_features=hidden)
        self.act_relu1 = nn.ReLU()
    
        
        '''
            Instead of nn.ReLU() for Rectified Linear Unit, you can use
                - nn.Sigmoid() for sigmoid
                - nn.Tanh() for tanh
                - nn.Softmax() for softmax
        '''
        self.hidden_layer = nn.Linear(in_features=hidden, out_features=n_classes)
        self.act_relu2 = nn.ReLU()
        
        self.output_layer = nn.Softmax(dim=1)
        
    def forward(self, inputs):
        outputs = self.input_layer(inputs)
        outputs = self.act_relu1(outputs)
        outputs = self.hidden_layer(outputs)
        outputs = self.act_relu2(outputs)
        outputs = self.output_layer(outputs)
        
        return outputs
    
model = ANNWithAct(16, hidden=8, n_classes=4)
print(model)

ANNWithAct(
  (input_layer): Linear(in_features=16, out_features=8, bias=True)
  (act_relu1): ReLU()
  (hidden_layer): Linear(in_features=8, out_features=4, bias=True)
  (act_relu2): ReLU()
  (output_layer): Softmax(dim=1)
)
