# 1. Import statements

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch.optim as optim

# 2. Tensor Basics

## Create Tensor

Initialize random 5x3 matrix: 

In [2]:
x = torch.rand(5,3)

Construct a matrix filled zeros and of dtype long:

In [18]:
x = torch.zeros(5, 3, dtype=torch.long)
print(x)

tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])


Construct a tensor directly from data:

In [10]:
x = torch.tensor([5.5, 3])

Converting a torch tensor to NumPy Array and vice versa:

In [5]:
numpy_array = x.numpy()
x = torch.from_numpy(numpy_array)

## Operations

In [42]:
x = torch.randn(3, 7, 2, dtype=torch.double)
print(x)

tensor([[[-0.7028,  0.1901],
         [ 1.7985, -0.2737],
         [ 0.4400, -0.8318],
         [-0.8569,  2.2270],
         [ 1.3089,  0.1994],
         [ 0.0693,  0.3087],
         [ 0.0447, -0.0729]],

        [[ 0.2017, -0.4898],
         [-0.3361,  0.4311],
         [-0.0747, -0.0776],
         [-1.3892,  0.5870],
         [ 1.7291, -0.6484],
         [ 0.6536, -1.0552],
         [ 1.4439, -1.0526]],

        [[-0.8398, -1.0046],
         [-1.5248, -0.8343],
         [-2.7795,  1.4313],
         [-0.2373, -0.3244],
         [ 0.3934, -0.5255],
         [ 0.9037, -0.0034],
         [ 1.9146,  1.7218]]], dtype=torch.float64)


### Maße des Tensors:

In [43]:
print(x.size())
print(x.size(0)) # Size of 0. (1.) Dimension == Number of Samples

torch.Size([3, 7, 2])
3


## Slice Tensor

In [44]:
y = x[:,:,1]
print(y)

tensor([[ 0.1901, -0.2737, -0.8318,  2.2270,  0.1994,  0.3087, -0.0729],
        [-0.4898,  0.4311, -0.0776,  0.5870, -0.6484, -1.0552, -1.0526],
        [-1.0046, -0.8343,  1.4313, -0.3244, -0.5255, -0.0034,  1.7218]],
       dtype=torch.float64)


# Sum

In [45]:
torch.sum(input=y, dim=1)

tensor([ 1.7467, -2.3054,  0.4609], dtype=torch.float64)

### Squeeze Tensor
- Squeezing a tensor removes the dimensions or axes that have a length of one.
- Unsqueezing a tensor adds a dimension with a length of one.

In [3]:
# 5 = 0. Dimension; 3 = 1. Dimension; 1 = 2. Dimension
x = torch.randn(5, 3, 1, dtype=torch.double)
print(x)
print(x.size())
print("-------------------")
y = torch.squeeze(x)
print(y.size())
print(y)
print("-------------------")
y_1 = torch.unsqueeze(y, dim=2) # Add a new dimension on the 2. place 
print(y_1.size())
print(y_1)
print("-------------------")
y_2 = torch.unsqueeze(y, dim=1) # Add a new dimension on the 2. place 
print(y_2.size())
print(y_2)

tensor([[[ 9.5300e-01],
         [ 2.4879e+00],
         [ 7.7723e-04]],

        [[ 2.5263e-01],
         [-2.1398e+00],
         [ 7.5477e-02]],

        [[ 1.4362e+00],
         [ 3.9126e-01],
         [-6.1167e-01]],

        [[ 7.9002e-01],
         [-7.8850e-01],
         [-2.1712e+00]],

        [[-8.5125e-01],
         [ 3.6094e-01],
         [-9.9632e-01]]], dtype=torch.float64)
torch.Size([5, 3, 1])
-------------------
torch.Size([5, 3])
tensor([[ 9.5300e-01,  2.4879e+00,  7.7723e-04],
        [ 2.5263e-01, -2.1398e+00,  7.5477e-02],
        [ 1.4362e+00,  3.9126e-01, -6.1167e-01],
        [ 7.9002e-01, -7.8850e-01, -2.1712e+00],
        [-8.5125e-01,  3.6094e-01, -9.9632e-01]], dtype=torch.float64)
-------------------
torch.Size([5, 3, 1])
tensor([[[ 9.5300e-01],
         [ 2.4879e+00],
         [ 7.7723e-04]],

        [[ 2.5263e-01],
         [-2.1398e+00],
         [ 7.5477e-02]],

        [[ 1.4362e+00],
         [ 3.9126e-01],
         [-6.1167e-01]],

        [[ 7.9002

### Reshape tensor: 

In [40]:
x = torch.randn(4, 4) # 4x4
y = x.view(16) # 1x16
z = x.view(-1, 8) # 2x8

### Grundoperationen von Tensoren:

In [24]:
x = torch.rand(5,3)
y = torch.rand(5,3)
z = x + y # Add x and y element wise
z = x * y # Multiply x and y element wise 
z = x + 2 # Add 2 to every element of x

Indexing is like in NumPy:

In [None]:
x[:,1]

# 3. Autograd: Automatic Differentiation

## Dynamic Computational graph
It abstracts the complicated mathematics and helps us “magically” calculate gradients of high dimensional curves with only a few lines of code. On setting ``<Tensor>.requires_grad = True`` tensors start forming a backward graph that tracks every operation applied on them. <br>
The autograd class is an engine to calculate derivatives (Jacobian-vector product to be more precise). It records a graph of all the operations performed on a gradient enabled tensor and creates an acyclic graph called the dynamic computational graph. <br>
The leaves of this graph are input tensors and the roots are output tensors. Gradients are calculated by tracing the graph from the root to the leaf and multiplying every gradient in the way using the chain rule. <br>
Gradient enabled tensors (variables) along with functions (operations) combine to create the dynamic computational graph. The flow of data and the operations applied to the data are defined at runtime hence constructing the computational graph dynamically. <br>
Each tensor has a `.grad_fn attribute` that references a Function that has created the Tensor (except for Tensors created by the user - their grad_fn is None). <br>
If you want to compute the derivatives, you can call `.backward()` on a Tensor.

# 4. PyTorch terminology
## Variable 
A wrapper around tensor is created called Variable to store more properties. <br>
Variable have certain properties:
- .data (the tensor under the variable) 
- .grad (the gradient computed for this variable, must be of the same shape and type of .data), 
- .requires_grad (boolean indicating whether to calculate gradient for the Variable during backpropagation)
- .grad_fn (the function that created this Variable, used when backproping the gradients).
- .volatile, whose function will be explained later on. 

Variable is available under `torch.autograd.Variable`

## Parameter
Parameter is a subclass of Variable so most behaviors are the same.
The most important difference is that if you use `nn.Parameter` in a `nn.Module's` constructor, it will be added into the modules parameters just like nn.Module object do. Here is an example:


In [43]:
class MyModule(torch.nn.Module):

    def __init__(self):
        super().__init__()
        self.variable = torch.autograd.Variable(torch.Tensor([5]))
        self.parameter = torch.nn.Parameter(torch.Tensor([10]))

Das bedeutet ...

## Functions
These transforms the input using some operation. These do not store any state or buffer, so, have no memory of their own and are completely predictable. A log function will give log value output of its inputs. Whereas, a linear layer cannot be a function, since it have internal states such as weights and biases. <br>
Whenever we need to create a new function we will create a subclass and inherit from `torch.autograd.Function`.

## Modules
In modules we can club our parameters, layers and functions. Whenever, we are backproping we will compute gradients for parameters of the module and child modules recursively.
Predefined modules are implemented under `torch.nn` as `torch.nn.Conv2d`, `torch.nn.Linear` etc. <br>
Whenever we need to define a new model (module) we will create a subclass and inherit from `torch.nn.Module`

## Use it inside the code

In [28]:
x = torch.ones(2, 2, requires_grad=True)
y = x + 2


# 5. Neural Network

## Different Layer Typs

Convolutional Layer:

In [31]:
conv1 = nn.Conv2d(1, 6, 5) #in_channels, out_channels, kernel_size

Fully Connected Layer:

In [32]:
fc1 = nn.Linear(120, 84) #120 features in; 84 features out

LSTM:

In [38]:
lstm = nn.LSTM(3, 3) # 

## Different Functions
- Input of a function is its in front tensor and the input of this tensor is the previous x: `x = F.relu(self.fc1(x))`
- You can nest functions: `x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))`

Some examples:<br>
F.relu() <br>
F.dorpout() <br>
F.log_softmax() <br>
F.elu() <br>
F.sigmoid() <br>
...

## Get weights of each Layer
You can access weights of each layer which is defined in your network: `network.conv1.weights`. <br>
The layer weigth shape is accessable like this: `network.conv1.shape`

# 6. Build and Train Neural Network
## Define NN via Class

In [1]:
class Network(nn.Module):
    # inside __init__() you define the different layers
    def __init__(self):
        super(MeinNetz, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
    
    # inside forward() you define the sequence of functions
    # input of a function is its in front tensor and the input of this tensor is the previous x
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2)) # you can nest functions
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)
    
    def num_flat_features(self, x):
        size = x.size[1:]
        num = 1
        for i in size:
            num *= i
        return num
    

NameError: name 'nn' is not defined

## Initialize Network, Define Input and Target

Initialize Network

In [36]:
neural_network = Network()

Define input and target

In [39]:
input = Variable(torch.randn(10,10)) # 10x datapoints with 10 features
target = Variable(torch.Tensor([[0,1,1,1,0,1,1,1,0,0] for _ in range(10)]))

## Define Loss-function and Optimizer

In [42]:
loss_fn = nn.MSELoss()
optimizer = optim.SGD(netz.parameters(), lr=0.01)

## Train the model

In [None]:
hist = np.zeros(num_epochs)

for epoch in range(num_epochs):
    # Zero out gradient, else they will accumulate between epochs
    optim.zero_grad()
    
    # Forward pass
    out = neural_network(input)
    loss = loss_fn(out, target) # 2 parameters: 1x networks prediction and 1x true value
    hist[t] = loss.item()
    
    # Backward pass
    loss.backward()
    
    # Update parameters
    optimizer.step()

# 7. Use grafic card
First you have to execute the `.cuda()` function on the initialized network:

In [None]:
neural_network = Network()
neural_network = neural_network.cuda()

Second you have to execute the `.cuda()` function on every Variable:

In [None]:
input = Variable(torch.Tensor([1,0,0,0,0,0,1,0,1]))
input = input.cuda()
target = Variable(torch.Tensor([1,0,0,0,0,0,1,0,1]))
target = target.cuda()

# 8. Save and load neuornal network
## Save:
When saving a model for inference, it is only necessary to save the trained model’s learned parameters. A common PyTorch convention is to save models using `.pt` file extension. 

In [None]:
torch.save(neural_network.state_dict(), "<path>/neural_network.pt")

## Load:
Remember that you must call `model.eval()` to set the network to evaluation mode before running predictions with the neural network. By default all the modules are initialized to train mode (self.training = True). Because some layers have different behavior during train/and evaluation (like Dropout, etc.) it matters to set `.eval()` to change the net to evaluation (prediction) mode.

In [None]:
neural_network = TheModelClass(*args, **kwargs) # Enter name of defined class for that network
neural_network.load_state_dict(torch.load("<path>/neural_network.pt")
neural_network.eval()                               

## Saving & Loading a Checkpoint:
Deep learning models can take hours, days or even weeks to train. If the run is stopped unexpectedly, you can lose a lot of work. Application Checkpoint is a fault tolerance technique for long running processes. <br>
__Save__: <br>
When saving a general checkpoint, to be used for resuming training, you must save more than just the model’s state_dict. It is important to also save the optimizer’s state_dict, as this contains buffers and parameters that are updated as the model trains. You can save any other items (like loss) that may aid you in resuming training by simply appending them to the dictionary.

In [None]:
torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss,
            ...
            }, "<path>/neural_network.pt")

__Load__: <br>
To load the models, first initialize the models __and__ optimizers, then load the dictionary locally using torch.load(). From here, you can easily access the saved items by simply querying the dictionary as you would expect. <br>
Remember that you must call `model.train()` to set the network to training mode before train the neural network. 

In [None]:
model = TheModelClass(*args, **kwargs) # Enter name of defined class for that network
optimizer = TheOptimizerClass(*args, **kwargs) # Enter name of defined class for the optimizer

checkpoint = torch.load(PATH)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']

model.train()

# 8. Data Loader
Dataset and Data loaders are the tools in PyTorch can define how to access your data. This is specially interesting when your data is distributed over several files. For instance, if you have several images in some directory structure, you can personalize the way you access it with the Dataset class.

`torch.utils.data.Dataset` is an abstract class representing a dataset. Your custom dataset should inherit `Dataset` and override the following methods: 
- init: In the initialization, you should put your directories information and other things that would allow to access it.
- len: In this method, you should implement a way to get the entire size of your dataset. For instance, if you have a set of images in some directories, you have to implement a way of counting the total number of files that makes your data. In my basic example, I simply get the length of my dataframe.
- getitem: This is where you implement how to get a single item from your dataset. For instance, if you have several images, here is wher

This is memory efficient because all the images are not stored in the memory at once but read as required.

In [None]:
from torch.utils.data import Dataset, DataLoader

In [None]:
import torch
import pandas as pd
from torch.utils.data import Dataset, DataLoader

class ExampleDataset(Dataset):
    """Example Dataset"""

    def __init__(self, csv_file):
        """ 
        csv_file (string): Path to the csv file containing data.
        """
        self.data_frame = pd.read_csv(csv_file)

    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        return self.data_frame[idx]

In [1]:
# instantiates the dataset  
example_dataset = ExampleDataset('my_data_file.csv')

# batch size: number of samples returned per iteration
# shuffle: Flag to shuffle the data before reading so you don't read always in the same order
# num_workers: used to load the data in parallel
example_data_loader = DataLoader(example_dataset, batch_size=4, num_workers=4)

# Loops over the data 4 samples at a time
for batch_index, batch in enumerate(example_data_loader):
    print(batch_index, batch)


NameError: name 'ExampleDataset' is not defined