# Introduction to PyTorch, a Deep Learning Library

## Tensors

In [1]:
import torch # tabular data
import numpy as np

# create a tensor from a list
lst = [[1, 2, 3], [4, 5, 6]]
tensor = torch.tensor(lst)

# create a tensor from numpy array
np_array = np.array(lst)
np_tensor = torch.from_numpy(np_array)

Tensor attributes

In [5]:
tensor.shape

torch.Size([2, 3])

In [7]:
tensor.dtype

torch.int64

In [8]:
tensor.device

device(type='cpu')

Tensor operation on compatible shapes

In [9]:
tensor + np_tensor

tensor([[ 2,  4,  6],
        [ 8, 10, 12]])

In [10]:
tensor * np_tensor

tensor([[ 1,  4,  9],
        [16, 25, 36]])

## First neural network
A 2-layer network

In [12]:
import torch.nn as nn

# input tensor with 3 features
input_tensor = torch.tensor([[0.3471, 0.4547, -0.2356]])

# first linear layer
linear_layer = nn.Linear(in_features=3, out_features=2)

# pass input through linear layer
output = linear_layer(input_tensor)
print(output)

tensor([[ 0.4680, -0.6139]], grad_fn=<AddmmBackward0>)


Each linear layer has weights and biases

In [13]:
linear_layer.weight

Parameter containing:
tensor([[ 0.4967,  0.3452,  0.5310],
        [ 0.1092, -0.4248, -0.0738]], requires_grad=True)

In [14]:
linear_layer.bias

Parameter containing:
tensor([ 0.2638, -0.4760], requires_grad=True)

A linear layer performs a matrix multiplication of input and weights, followed by adding the bias

y0 = W0 * X + b0 

where X is the input tensor, W0 are the weights and b0 the bias

* Weights and bias are initialized randomly
* NNs with only linear layers are called fully-connected
* X is 2 x 3, W0 is 2 x 3, b0 is 2, y0 is 1 x 2

## Stacking layers

In [31]:
# 3 layers
model = nn.Sequential(
    nn.Linear(10, 18),
    nn.Linear(18, 20),
    nn.Linear(20, 5)
)
input = torch.tensor([[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.0]])
output = model(input)
output.shape

torch.Size([1, 5])

## Activation Functions

Linear layer: multiply input by weights, add bias.

Non-linear layers can model more complex relationships.

### Sigmoid
* used for classification problems
* Transform pre-activation output (e.g. an int or a float) to a flot between 0 and 1. Common threshold for classification is 0.5. 

In [2]:
import torch
import torch.nn as nn

input_tensor = torch.tensor([[6.0]])
sigmoid = nn.Sigmoid()
output = sigmoid(input_tensor)
output

tensor([[0.9975]])

Common usage as last step for binary classification (= logistic regression)

In [None]:
model = nn.Sequential(
    nn.Linear(6, 4), # Linear layer
    nn.Linear(4, 1), # Linear layer
    nn.Sigmoid() # Sigmoid activation
)

### Softmax
* For multi-class classification
* Output vector is same size as input
* The output is a probability distribution (each element between 0 and 1, sum is 1)


In [3]:
import torch
import torch.nn as nn

input_tensor = torch.tensor([[4.0, 6.0, 8.0]])

probabilities = nn.Softmax(dim = -1) # -1 means apply along last dimension
probabilities(input_tensor)

tensor([[0.0159, 0.1173, 0.8668]])