In [None]:
from jupyterthemes import jtplot
jtplot.style(figsize = (15, 10), grid = False, ticks = True)

In [None]:
import numpy as np
import pandas as pd
# ML
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder

import cv2
import torch
import torch.nn as nn 
import torch.nn.functional as F
import torchvision

# Plots
import matplotlib.pyplot as plt

# Utils
from tqdm import tqdm, trange
import os

## Utils

# Pytorch API

Package| 	Description|
-|-|
`torch`| 	The top-level PyTorch package and tensor library.
`torch.nn`| 	A subpackage that contains modules and extensible classes for building neural networks.
`torch.autograd`| 	A subpackage that supports all the differentiable Tensor operations in PyTorch.
`torch.nn.functional`| 	A functional interface that contains typical operations used for building neural networks like loss functions, activation functions, and convolution operations.
`torch.optim`| 	A subpackage that contains standard optimization operations like SGD and Adam.
`torch.utils`| 	A subpackage that contains utility classes like data sets and data loaders that make data preprocessing easier.
`torchvision`| 	A package that provides access to popular datasets, model architectures, and image transformations for computer vision. 

# Tensors

## Tensor introduction

 Each of these examples are specific instances of the more general concept of a tensor:
- number / scalar
- array / vector
- 2d-array / matrix

Tensors are generalizations: $n$d-arrays

In [None]:
x = torch.tensor([1, 2, 3])
x

In [None]:
# Dtypes
x = torch.tensor([1, 2, 3], dtype = torch.float32)
x

In [None]:
x = torch.tensor(2)
x

In [None]:
x.item(), type(x.item())

### Get information about a tensor

In [None]:
print(f"Shape of tensor: {x.shape}")
print(f"Datatype of tensor: {x.dtype}")
print(f"Device tensor is stored on: {x.device}")

### Tensor operations

#### Matrix Operations

In [None]:
x = torch.tensor([[1, 2, 3],
                 [4, 5, 6]], dtype = torch.float32)

In [None]:
print(x.sum())
print(x.sum(axis = 0))
print(x.sum(axis = 1))

In [None]:
y = 2 * torch.ones((3, 2))
y

- Matrix multiplication
![image.png](attachment:image.png)

In [None]:

print(x.matmul(y))
print(x @ y)

In [None]:
x + 1

In [None]:
x * y.T

#### Reshape, squeeze, flatten

In [None]:
print(x)
print(x.reshape(3, 2))
print(x.flatten())

In [None]:
print(x, x.shape)
print()
print(x.unsqueeze(0), x.unsqueeze(0).shape)

print(x.unsqueeze(-1), x.unsqueeze(-1).shape)

print(x.unsqueeze(1), x.unsqueeze(1).shape)


### Concatenation

In [None]:
x = torch.tensor([[1, 2, 3],
                 [4, 5, 6]])
y = torch.tensor([[10, 20, 30],
                 [40, 50, 60]])

torch.cat([x, y], dim = 0)

In [None]:
torch.cat([x, y], dim = 1)

## Usual tensor dimensions used in practice

1. 2D - Numerical data (Vector data):
- `(num_samples, num_features)`
2. 3D - Time series: 
- `(num_samples, num_timesteps, num_features)`
3. 4D - Image data: 
- `(num_samples, height, width, channels)` or `(channels, num_samples, height, width)`
4. 5D - Video:
- `(num_samples, frames, height, width, channels)`



# Cuda

## Device 

In [None]:
torch.cuda.is_available()

In [None]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(DEVICE)

In [None]:
torch.cuda.get_device_properties(0)

In [None]:
torch.cuda.device_count()

## Memory

In [None]:
print(torch.cuda.memory_summary(abbreviated=True))

In [None]:
def print_memory_stats(mb = True):
    d = 1
    if mb == True: 
        d = 1024 * 1024
        
    t = torch.cuda.get_device_properties(0).total_memory / d
    r = torch.cuda.memory_reserved(0) / d
    a = torch.cuda.memory_allocated(0)  / d
    f = r-a  # free inside reserved
    print(f"total memory: {t} MB")
    print(f"reserved memory: {r} MB")
    print(f"allocated memory: {a} MB")
    print(f"free memory inside reserved : {f} MB")
    #return t, r, a, f

In [None]:
# Look at memory
print_memory_stats()

In [None]:
x = torch.rand(1000000)
x, x.device

In [None]:
# Put a tensor into gpu memory
x = x.cuda()
print(x.device)
print_memory_stats()

In [None]:
# Try to empty cache. It's the same since we have a reference to `x`
torch.cuda.empty_cache()
print_memory_stats()

In [None]:
# Delete x and look at memory. 
del x
print_memory_stats()

In [None]:
#Indeed, we can't acces `x` anymore
x

In [None]:
# Try to empty cache. 
torch.cuda.empty_cache()
print_memory_stats()

In [None]:
torch.tensor([1, 2, 3]).cuda()
print_memory_stats()

In [None]:
def pretty_size(size):
    """Pretty prints a torch.Size object"""
    assert(isinstance(size, torch.Size))
    return " × ".join(map(str, size))

def dump_tensors(gpu_only=True):
    """Prints a list of the Tensors being tracked by the garbage collector."""
    import gc
    total_size = 0
    for obj in gc.get_objects():
        try:
            if torch.is_tensor(obj):
                if not gpu_only or obj.is_cuda:
                    print(f'{type(obj).__name__}: {" GPU" if obj.is_cuda else ""}{" pinned" if obj.is_pinned else ""} {pretty_size(obj.size())}')
                    total_size += obj.numel()
            elif hasattr(obj, "data") and torch.is_tensor(obj.data):
                if not gpu_only or obj.is_cuda:
                    print("%s → %s:%s%s%s%s %s" % (type(obj).__name__, 
                                                   type(obj.data).__name__, 
                                                   " GPU" if obj.is_cuda else "",
                                                   " pinned" if obj.data.is_pinned else "",
                                                   " grad" if obj.requires_grad else "", 
                                                   " volatile" if obj.volatile else "",
                                                   pretty_size(obj.data.size())))
                    total_size += obj.data.numel()
        except Exception as e:
            pass        
    print("Total size:", total_size)

In [None]:
x = torch.rand((100, 100, 100)).cuda()
y = torch.rand((2134, 4444)).cuda()
dump_tensors()

In [None]:
del x, y

In [None]:
dump_tensors()

## GPU to CPU

In [None]:
x = torch.rand((100, 100, 100)).cuda()
dump_tensors()

In [None]:
x = x.cpu()

In [None]:
dump_tensors()

In [None]:
del x

In [None]:
dump_tensors()

In [None]:
print_memory_stats()

In [None]:
torch.cuda.empty_cache()
print_memory_stats()

# Data

## Dataset, DataLoader

- https://pytorch.org/tutorials/beginner/basics/data_tutorial.html

In [None]:
from torch.utils.data import Dataset, DataLoader

In [None]:
class MyDataset(Dataset):
    def __init__(self):
        numbers = np.arange(100)
        self.data = numbers
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        return self.data[index]

In [None]:
dataset = MyDataset()
print(len(dataset))
print(dataset.data)

In [None]:
dataloader = DataLoader(dataset, batch_size = 10, shuffle = True)
for i, batch in enumerate(dataloader):
    print(i, batch)

# Training

![image.png](attachment:image.png)

## Data

In [None]:
import pandas as pd

df_train =  pd.read_csv('../lab5/mnist/mnist_train.csv')
df_test =  pd.read_csv('../lab5/mnist/mnist_test.csv')

X_train, y_train = df_train.drop('label', axis = 1).to_numpy(), df_train['label'].to_numpy()
X_test, y_test = df_test.drop('label', axis = 1).to_numpy(), df_test['label'].to_numpy()
X_train = X_train / 255
X_test = X_test / 255

In [None]:
X = np.concatenate([X_train, X_test])
y = np.concatenate([y_train, y_test])
X.shape, y.shape

In [None]:
G = 16
X_sel = X[:G * G]
plt.figure(figsize = (10, 10))
plt.imshow(np.concatenate(X_sel.reshape(G, 28 * G, 28), axis = 1))

In [None]:
class DigitsDataset(Dataset):
    def __init__(self, X, y):
        # Make your own dataset. Include self.num_features, self.num_classes for ease of use
        # Code here
        

In [None]:
dataset = DigitsDataset(X, y)
#dataloader = DataLoader(dataset, batch_size = 64, shuffle = True)

In [None]:
from torch.utils.data import random_split

train_len = int(.8 * len(dataset))
test_len = len(dataset) - train_len

train_dataset, test_dataset = random_split(dataset, [train_len, test_len])

## Models

In [None]:
class Model(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(Model, self).__init__()
        # Hidden layers
        self.fc = nn.Linear(input_dim, output_dim)
        
    def forward(self, x):
        x = self.fc(x)
        return F.softmax(x, dim = 1)
    
# Make and use a stronger model


In [None]:
input_shape = dataset.num_features
output_shape = dataset.num_classes
model = Model(input_shape, output_shape)
#model.cuda()

In [None]:
model

In [None]:
for p in model.parameters():
    print(p.shape)
    #print(p)

## Optimizers, loss

In [None]:
learning_rate = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

loss_function = nn.CrossEntropyLoss()

## Training

In [None]:
torch.argmax(model(X_batch), axis = 1)

In [None]:
X_batch.shape

In [None]:
batch_size = 64
trainloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
testloader = DataLoader(train_dataset, batch_size=len(test_dataset))

In [None]:
epochs = 5
accs = []
losses = []
model.train() # Set the model to training mode
for epoch in (t:= trange(epochs)):
    
    loss_epoch = 0.
    acc = 0.
    num_batches = 0
    for X_batch, y_batch in trainloader:
        num_batches +=1
        y_pred = model(X_batch)
        
        loss = loss_function(y_pred, y_batch)    
        loss_epoch += loss.item()
        
        acc += accuracy_score(torch.argmax(y_pred, axis = 1), y_batch)
        
        optimizer.zero_grad() # don't forget this
        loss.backward()
        optimizer.step()
    
    acc /= num_batches
    loss_epoch /= num_batches
    losses.append(loss_epoch)
    accs.append(acc)
    t.set_description(f"Loss: {round(loss_epoch, 2), round(acc, 2)}")

## Evaluate

In [None]:
fig, axs = plt.subplots(1, 2, figsize = (20, 5))

axs[0].plot(losses)
axs[1].plot(accs)

In [None]:
model.eval()
X_test, y_test = test_dataset.dataset.X, test_dataset.dataset.y
y_pred= model(X_test)

print(accuracy_score(torch.argmax(y_pred, axis = 1), y_test))