# Pytorch packages:
- torch: the top level packages
- torch.nn: contains layers, weights...
- torch.autograd: handles the derivative calculations
- torch.nn.functional: loss functions/activation functions/convolution operations
- torch.optim: optimization algorithms: SGD/Adam
- torch.utils: datasets/dataloaders

# CUDA:
- GPU are better than CPU if the computation can be done in paralel (CPUs can have 4/8/16 cores, comparing to GPUs which can have thousands of cores(higher GPUs have ≈3000 cores))
- NN are embarassingly parallel (could be easly broken into smaller computations: e.g.: Convolution operation)
- CUDA is a SW platform that pairs with the GPU platform
- CUDA is the SW layer that provides an API to the developers
- Don't use CUDA for simple tasks

In [None]:
import torch

t = torch.tensor([[1, 2, 3],[4, 5, 6]])
# tensor([[1, 2, 3],[4, 5, 6]]) is on CPU, by default
t = t.cuda()
# tensor([[1, 2, 3],[4, 5, 6]], device='cuda:0') is on GPU (the first GPU)

# Tensors:

- number = scalar
- array = vector
- 2d-array = matrix
- nd-tensor = nd-array

- Rank of a tensor: the number of dimensions (matrix has rank 2)


# Shape of a tensor:

In [None]:
import torch

t = torch.tensor([[1, 2, 3],[4, 5, 6]])

t.shape 
# torch.Size([2, 3])

t.reshape(1, 6).shape
# torch.Size([1, 6])

# Methods

In [None]:
import torch
import numpy as np

t = torch.Tensor()
print(t.dtype)
# torch.float
print(t.device)
# cpu
print(t.layout)
# torch.strided : how our tensors data is laid out in memory
device = torch.device('cuda:0')
# device(type='cuda', index=0)

# ERRORS

In [None]:
t1 = torch.tensor([1, 2, 3])
t2 = torch.tensor([1., 2., 3.])
t1 + t2 # ERROR

t1 = torch.tensor([1, 2, 3])
t2= t1.cuda()
t1 + t2 # ERROR

# Creation of tensors

In [None]:
import torch
import numpy as np

data = np.array([1, 2, 3])

torch.Tensor(data) 
# tensor([1., 2., 3.])

torch.tensor(data)
# tensor([1, 2, 3], dtype=torch,int32)

torch.as_tensor(data)
# tensor([1, 2, 3], dtype=torch,int32)

torch.from_numpy(data)
# tensor([1, 2, 3], dtype=torch,int32)

torch.eye() # identity tensor
# 1 0
# 0 1
torch.zeros(2, 2)
# 0 0
# 0 0
torch.ones(2, 2)
# 1 1
# 1 1
torch.rand(2, 2)
# 0.312 0.652
# 0.452 0.912

# Differences

In [None]:
import torch
import numpy as np

data = np.array([1, 2, 3])

torch.Tensor(data)
# tensor([1., 2., 3.]) # Class constructor

torch.tensor(data)
# tensor([1, 2, 3], dtype=torch,int32) # Factory function ( also as_tensor, from_numpy) -> Prefered 

torch.as_tensor(data)
# tensor([1, 2, 3], dtype=torch,int32)

torch.from_numpy(data)
# tensor([1, 2, 3], dtype=torch,int32)

# Set the data type
torch.tensor(np.array([1, 2, 3]), dtype=torch.float64) # tensor([1., 2., 3.], dtype=torch.float64)

# Change the array
data = np.array([0, 0, 0])

torch.Tensor(data)
# tensor([1., 2., 3.]) -> Unchanged/Create an additional copy of the data in memory

torch.tensor(data)
# tensor([1, 2, 3], dtype=torch,int32) -> Unchanged/Create an additional copy of the data in memory --> Most used

torch.as_tensor(data)
# tensor([0, 0, 0], dtype=torch,int32) -> Changed/Share data --> Accepts any array
# as_tensor doesn't work with built-in data structures like lists.

torch.from_numpy(data)
# tensor([0, 0, 0], dtype=torch,int32) -> Changed/Share data --> Accepts only numpy arrays

In [None]:
import torch

t = torch.tensor([
  [1, 1, 1, 1],
  [2, 2, 2, 2],
  [3, 3, 3, 3]
], dtype=torch.float32)

# To find the shape:
t.size()
# torch.Size([3, 4])
t.shape
# torch.Size([3, 4])

# To see thwe number of elements
t.numel()
# 12

# Squeezing and unsqueezing a tensor
t.reshape(1, 12).squeeze()
# tensor([1., 1., 1., 2., 2... 3.])
t.reshape(1, 12).squeeze().unsqueeze(dim=0) # tensor([[1., 1., 1., 2., 2... 3.]])

# Flattening function:
def flatten(my_tensor):
	my_tensor = my_tensor.reshape(1, -1)
	my_tensor = my_tensor.squeeze()
	return my_tensor

t = torch.tensor([
  [1, 1, 1, 1],
  [2, 2, 2, 2]
], dtype=torch.float32)

flatten(t) # tensor([1, 1, 1, 1, 2, 2, 2, 2])

In [None]:
import torch

t1 = torch.tensor([
	[1, 1, 1, 1],
	[1, 1, 1, 1],
	[1, 1, 1, 1],
	[1, 1, 1, 1]
])

t2 = torch.tensor([
	[2, 2, 2, 2],
	[2, 2, 2, 2],
	[2, 2, 2, 2],
	[2, 2, 2, 2]
])

t3 = torch.tensor([
	[3, 3, 3, 3],
	[3, 3, 3, 3],
	[3, 3, 3, 3],
	[3, 3, 3, 3]
])

# Concatenate
t = torch.stack((t1, t2, t3))
t.shape
# torch.Size([3, 4, 4]) # batch of 3 tensors with the height and weight of 4

# In order for a CNN to understand the imput (it expects also a color channel, we need to reshape the tensor):
t = t.reshape(3, 1, 4 ,4)
# 3 - image; 1 - color channel; 4 - rows of pixels; 4 - pixels per row
# When working with CNNs, flattening is required. Flattening examples:
t = torch.tensor([[
	[1, 1],
	[2, 2]],
	[[3, 3],
	 [4, 4]]]

t.reshape(1, -1)[0]
t.reshape(-1)
t.view(t.numel())
t.flatten() # Flatten all the 3 images ( we don't want that)
# tensor([1, 1, 2, 2, 3, 3, 4, 4])

t.flatten(start_dim=1) # Flatten all the 3 images ( we don't want that)
# tensor([[1, 1, 2, 2],
#         [3, 3, 4, 4]])

# Element wise operations:
The 2 tensors needs to have the same shape to perform an element wise operation


In [None]:
t1 = torch.tensor([
	[1, 2],
	[3, 4]])

t2 = torch.tensor([
	[9, 8],
	[7, 6]])

t1 + t2 # or t1.add(t2)
# 10.0 10.0
# 10.0 10.0

# Broadcasting:
# t1 + 2 means that the 2 is broadcasted:
np.broadcast_to(2, t1.shape)
# array([2, 2],
#   	[2, 2]])

a = array([10, 5, -1])
print(a>0)
# array([True, True, False], dtype=bool)

t1 = torch.tensor([
	[1, 2],
	[3, 4]])
t2 = torch.tensor([9, 8])
np.broadcast_to(t2, t1.shape)
# array([9, 8],
#   	[9, 8]])

# Reduction operations: Are the operations which reduces the number of elements

In [None]:
import torch
import numpy as np

t = torch.tensor([
	[0, 1, 0],
	[2, 0, 2],
	[0, 3, 0]
], dtype=torch.float32)

# Sum
t.sum() # tensor(8.)
t.numel() # 9
t.sum().numel # 1

# Product, mean, std
t.prod() # tensor(0.)
t.mean() # tensor(0.8889)
t.std() # tensor(1.1667)

# Reduce a specific axis

t = torch.tensor([
	[1, 1, 1, 1],
	[2, 2, 2, 2],
	[3, 3, 3, 3]
], dtype=torch.float32)

# Sum
t.sum(dim=0) # tensor([6., 6., 6., 6.])
t.sum(dim=1) # tensor([4., 8., 12.]) 

# Argmax function: Tells the index location of the maximum value inside a tensor
t = torch.tensor([
	[1, 1, 1, 2],
	[3, 2, 2, 2],
	[4, 3, 1, 5]
], dtype=torch.float32)

t.max() # tensor(5.)
t.argmax() # tensor(11)
t.flatten() #  t = torch.tensor([1, 1, 1, 2, 3, 2, 2, 2, 4, 3, 1, 5])
t.max(dim=0) # the max values followed by the indexes: (tensor([4., 3., 2., 5.]), tensor([2., 2., 1., 2.]))
t.argmax(dim=0) # Only the indexes: tensor([2., 2., 1., 2.])

# What we do when we need the value:

t.max() # tensor(5.)
t.max().item() # 5.0

# What we do when we need the values:

t.mean(dim=0) # tensor([2.6, 2., 1.3, 3.])
t.mean(dim=0).tolist() # [2.6, 2., 1.3, 3.]
t.mean(dim=0).numpy() # array([2.6, 2., 1.3, 3.], dtype=float32)

# MNIST data set transformations:
- PNG
- trimming
- resizing
- sharpening
- extending
- negating
- grayscaling

# 4 steps of AI implementation
- Prepare the data : ETL process (Extract; Transform ; Load)
- Build the model
- Train the model
- Analyze the model's results

# Loading the data

In [None]:
import torch # top level pytorch package
import torchvision # acces to popular datasets and image transformation
import torchvision.transforms as transforms # gives acces to common transformation to image processing

train_set = torchvision.datasets.FashionMNIST(
	root='./data/FashionMNIST', # the location on disk where the data is located
	train=True, # the train set
	download=True, # tells the class to download the data if it's not present at the location we precised
	transforms=transforms.Compose([ # Compose class allows us to do more than 1 transformations
		transforms.ToTensor() # we need tensors
	])
)

train_loader = torch.utils.data.Dataloader(train_set, batch_size=10) # we can shuffle, have batch size ( quarrying )

# Visualize the data

In [None]:
import numpy as np
import matplotlib.pyplot as plt

torch.set_printoptions(linewidth=120)
train_set.train_labels # tensor([9, 0, 0 ... 5])
train_set.train_labels.bincount() # tensor([6000, 6000, ...., 6000]) the frequency of the data

# One sample
sample = next(iter(train_set))
len(sample) # 2
type(sample) # tuple
image, label = sample
image.shape # torch.Size([1, 28, 28])
label.shape # torch.Size([]) ; scalar
plt.imshow(image.squeeze(), cmap='gray')

# One batch
batch = next(iter(train_loader))
len(batch) # 2
type(batch) # list
images, labels = batch
images.shape # torch.Size([10, 1, 28, 28])
label.shape # torch.Size([10]) # rank one tensor
grid = torchvision.utils.make_grid(images, nrows=10)

plt.figure(figsize=(15, 15))
plt.imshow(np.transpose(grid, (1, 2, 0)))

# Build the model: with torch.nn
- In CNNs: kernel = filter

In [None]:
import torch.nn as nn

class Network(nn.Module):
	def __init__(self):
		super(Network, self).__init__()

		self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
		self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)

		self.fc1 = nn.Linear(in_features=12*4*4, out_features=120) # fully connected / Dense / Linear layers
		self.fc2 = nn.Linear(in_features=120, out_features=60)
		self.out = nn.Linear(in_features=60, out_features=10)


	def forward(self, t):
		t = self.layer(t)
		return t

	def __repr__(self):
		return "overriten above nn.Module"

# Learnable parameters in nn

In [None]:
network = Network()
print(network)
# Network(
# 	(conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1)) # the stride is the sliding of the filter after each computation
# 	(conv2): Conv2d(6, 12, kernel_size=(5, 5), stride=(1, 1))
# 	(fc1): Linear(in_features=192, out_features=120, bias=True) # the bias
# 	(fc2): Linear(in_features=120, out_features=60, bias=True)
# 	(out): Linear(in_features=60, out_features=10, bias=True)
# )
or
print(network.conv1)
# 	(conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))


# To check the weights

In [None]:
print(network.conv1.weight)
# tensor([[[[...................]]]])

In [None]:
network.conv1.weight.shape

In [None]:
for param in network.parameters():
    print(param.shape)
# torch.Size([x, x, x, x])

# For each layer, we have a bias tensor and a weight tensor 
for name, param in  network.named_parameters():
    print(name, '\t\t', param.shape)
# conv1.weight        torch.Size([6, 1, 5, 5])
# conv1.bias          torch.Size([6])
# conv2.weight        .....

# Callable nn

In [None]:
# Matrix multiplication:
in_features = torch.tensor([1, 2 ,3 ,4], dtype=float32)
weight_matrix = torch.tensor([
    [1, 2, 3, 4],
    [2, 3, 4, 5],
    [3, 4, 5, 6]    
], dtype=torch.float32)

weight_matrix.matmul(in_features)
# tensor([30., 40., 50.])

In [None]:
# or
fc = nn.Linear(in_features=4, out_features=3)
fc.weight nn.Parameter(weight_matrix)

fc(in_features)
# tensor([30.213, 40.213, 50.213], grad_fn=<AddBackward0>)

# or
fc = nn.Linear(in_features=4, out_features=3, bias=False)
fc.weight nn.Parameter(weight_matrix)

fc(in_features)
# tensor([30., 40., 50.], grad_fn=<AddBackward0>)


$y = Ax + b$
- A: Weight matrix tensor
- x: Input tensor
- b: Bias tensor
- y: Output tensor

# Callable nn:

In [None]:
import torch
import torch.nn as nn

fc = nn.Linear(in_features=4, out_features=3)
t = torch.tensor([1, 2, 3, 4], dtype=float32)

output = fc(t)
print(output)

# Forward Method

In [None]:
import torch

def forward(self, t):
    # (1) input layer
    t = t

    # (2) hidden conv layer
    t = self.conv1(t) # application of the convolution
    t = torch.nn.functional.relu(t) # the activatio function
    t = torch.nn.functional.max.pool2d(t, kernel_size=2, stride=2) # max pooling operation
    
    # (3) hidden conv layer
    t = self.conv2(t)
    t = torch.nn.functional.relu(t)
    t = torch.nn.functional.max.pool2d(t, kernel_size=2, stride=2)
    
    # (4) hidden linear layer
    t = t.reshape(-1, 12 * 4 * 4) # flattening the input
    t = self.fc1( t) # application of the linear layer
    t = torch.nn.functional.relu(t)

    # (5) hidden linear layer
    t = self.fc2(t)
    t = torch.nn.functional.relu(t)

    # (6) output layer
    t = self.out(t)
    # t = torch.nn.functional.softmax(t, dim=1) # activation function for the output layer : it won't be used because of the training
    # it will be using the cross_entrophy : it is used implicintly 
    
    return t


# Whole program

In [None]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

torch.set_printoptions(linewidth=120)

train_set = torchvision.datasets.FashionMNIST(
	root='./data/FashionMNIST',
	train=True,
	download=True,
	transforms=transforms.Compose([
		transforms.ToTensor()
	])
)

train_loader = torch.utils.data.Dataloader(train_set, batch_size=10) # we can shuffle, have batch size ( quarrying )


class Network(nn.Module):
	def __init__(self):
		super(Network, self).__init__()

		self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
		self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)

		self.fc1 = nn.Linear(in_features=12*4*4, out_features=120) # fully connected / Dense / Linear layers
		self.fc2 = nn.Linear(in_features=120, out_features=60)
		self.out = nn.Linear(in_features=60, out_features=10)


    def forward(self, t):
        # (1) input layer
        t = t

        # (2) hidden conv layer
        t = self.conv1(t) # application of the convolution
        t = torch.nn.functional.relu(t) # the activatio function
        t = torch.nn.functional.max.pool2d(t, kernel_size=2, stride=2) # max pooling operation

        # (3) hidden conv layer
        t = self.conv2(t)
        t = torch.nn.functional.relu(t)
        t = torch.nn.functional.max.pool2d(t, kernel_size=2, stride=2)

        # (4) hidden linear layer
        t = t.reshape(-1, 12 * 4 * 4) # flattening the input
        t = self.fc1(t) # application of the linear layer
        t = torch.nn.functional.relu(t)

        # (5) hidden linear layer
        t = self.fc2(t)
        t = torch.nn.functional.relu(t)

        # (6) output layer
        t = self.out(t)

        return t

network = Network()

sample = next(iter(train_set))
image, label = sample  
image.unsqueeze(0).shape # torch.Size([1, 1, 28, 28]) : batch_size, in_channels, H, W 
 
pred = network(image.unsqueeze(0))
print(pred) # tensor([[-0.313, 0.0123, 0.312. ......... -0.0023]])
print(label) # 9
print(pred.argmax(dim=1)) # tensor([2])
torch.nn.functional.softmax(pred, dim=1) # valori intre 0 si 1
torch.nn.functional.softmax(pred, dim=1).sum() # =1

# Batch processing

In [None]:
data_loader = torch.utils.data.Dataloader(train_set, batch_size=10)
batch = next(iter(data_loader))
images, labels = batch
images.shape # torch.Size([10, 1, 28, 28])
labels.shape # torch.Size([10])
preds = network(images)
preds.shape # torch.Size([10, 10])
print(preds.argmax(dim=1)) # tensor([3, 3, 2, 4 .... , 4])
print(labels) # tensor([2, 5, 7, 1 .... , 6])

# making comparison:
preds.argmax(dim=1).ex(labels) # tensor([1, 0, 0, 0 .... 1], dtype=uint8)
preds.argmax(dim=1).ex(labels).sum() # tensor(3)

def get_num_correct(preds, labels):
    return preds.argmax(dim=1).ex(labels).sum()

-  VS Code for debugging

# Training a CNN

The training process can be broken down into 7 distinct steps:
1. Get a batch from the training set
2. Pass the batch through the network
3. Calculate the loss ( difference between predicted and real) --> We use a loss function
4. Calculate the gradient of the loss fct with respect to the the network's weights --> We use back propagation
5. Update the weights using the gradient to reduce the loss. --> We use optimization algorithm
6. Repeat steps 1-5 until one epoch is completed ( all the images from the training set are passed throght the network)
7. Repeat steps 1-6 for as many epochs until we obtain the desired level of accuracy

In [None]:
import torch.optim as optim # for optimizer

# For gradient tracking feature:
torch.set_grad_enabled(True)

# Calculate the loss

In [None]:
preds = network(images)
loss = torch.nn.functional.cross_entropy(preds, labels)
loss.item() # 2.324214

# Calculate the gradients


In [None]:
print(network.conv1.weight.grad) # None

loss.backward()
print(network.conv1.weight.grad.shape) # torch.Size([6, 1, 5, 5])

# Update the weights

- SGD
- Adam

In [None]:
oprimizer = torch.optim.Adam(network.parameters(), lr=0.01) # lr = learning rate (hyper-parameter: you have to test and tune)
# the network.parameters are the network's weights
# lr=0.01 : how far in the minimum loss gradient direction you wanna step

loss.item() # 2.31241
get_num_correct(preds, labels) # 10

optimizer.step() # Updating the weights: We want to step in the direction of the loss function's miniumum

preds = network(images)
loss = torch.nn.functional.cross_entropy(preds, labels)

loss.item() # 2. 29321
get_num_correct(preds, labels) # 19


# Shortly: the training process

In [None]:
network = Network()

train_loader = torch.utils.data.Dataloader(train_set, batch_size=100)
optimizer = torch.optim.Adam(network.parameters(), lr=0.01)

batch = next(iter(train_loader)) # 1. Get a batch
images, labels = batch

preds = network(images) # 2. Pass the batch
loss = torch.nn.functional.cross_entropy(preds, labels) # 3. Calculate loss

loss.backward() # 4. Calculate Gradients
optimizer.step() # 5. Update the Weights
# __________________________________________
print('loss1:', loss.item())
preds = network(images)
loss torch.nn.functional.cross_entropy(preds, labels)
print('loss2:', loss.item())

# Training loop (complete)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms

torch.set_printoptions(linewidth=120)
torch.set_grad_enabled(True)

In [None]:
def get_num_correct(preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

In [None]:
class Network(torch.nn.Module):
    def __init__(self),
    super().__init__()
    self.conv1 = torch.nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
    self.conv2 = torch.nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
    
    self.fc1 = torch.nn.Linear(in_features=12 * 4 * 4, out_features=120)
    self.fc2 = torch.nn.Linear(in_features=120, out_features=60)
    
    self.out = torch.nn.Linear(in_features=60, out_features=10)
    
    def forward(self, t):
        t = self.conv1(t)
        t = torch.nn.functional.relu(t)
        t = torch.nn.functional.max_pool2d(t, kernel_size=2, stride=2)
        
        t = self.conv2(t)
        t = torch.nn.functional.relu(t)
        t = torch.nn.functional.max_pool2d(t, kernel_size, stride)
        
        t = t.reshape(-1, 12 * 4 * 4)
        t = self.fc1(t)
        t = self.torch.nn.functional.relu(t)
        
        t = self.fc2(t)
        t = self.torch.nn.functional.relu(t)
        
        t = self.out(t)
#       t = self.torch.nn.functional.softmax(t)

train_set = torchvision.datasets.FashionMNIST(
root = './data',
train=True,
download=True,
transform=torchvision.transforms.Compose([
    torchvision.transforms.ToTensor()
])
)

network = Network()

train_loader = torch.utils.data.Dataloader(train_set, batch_size=100)
optimizer = torch.optim.Adam(network.parameters(), lr=0.01)

for epoch in range(5):

    total_loss = 0
    total_correct = 0

    for batch in train_loader:
        images, labels = batch

        preds = network(images)
        loss = torch.nn.functional.cross_entropy(preds, labels)

        optimizer.zero_grad() # to reset the gradient (it adds it up by default)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        total_correct += get_num_correct(preds, labels)

    print("epoch:", epoch, "total_correct:", total_correct, "loss:", total_loss)

In [None]:
print(total_correct / len(train_set)) # 0.7798375

# Confusion matrix

In [None]:
def get_all_preds(model, loader):
    all_preds = torch.tensor([])
    for batch in loader:
        images, labels = batch
        
        preds = model(images)
        all_preds = torch.cat(
        (all_preds, preds),
        dim=0
        )
    return all_preds        

In [None]:
prediction_loader  = torch.utils.data.Dataloader(train_set, batch_size=10000)
train_preds = get_all_preds(network, prediction_loader)

In [None]:
print(train_preds.shape) # torch.Size([60000, 10])

In [None]:
print(train_preds.require_grad) # True ()
train_preds.grad # (nothing)

In [None]:
with torch.no_grad():
    prediction_loader = torch.utils.data.Dataloader(train_set, batch_size=10000)
    train_preds = get_all_preds(network, prediction_loader)

In [None]:
print(train_preds.requires_grad) # False

In [None]:
train_set.targets # tensor([9, 0 ,0, ....])
train_preds.argmax(dim=1) tensor([9, 0, 0 ....])

In [None]:
stacked = torch.stach(
train_set.targets,
train_preds.argmax(dim=1)), dim=1
)
stacked.shape # torch.Size([60000, 2])
stacked 
#tensor([[9, 9],
# [0, 0],
# .
# .])

In [None]:
stacked[0].tolist() # [9, 9]

In [None]:
cmt = torch.zeros(10, 10, dtype=torch.int64)

for p in stacked:
    true_label, predicted_label = p.tolist()
    cmt[true_label, predicted_label] = cmt[true_label, predicted_label] + 1

In [None]:
print(cmt)
# tensor([[5324, 21, 42, 5 ,1 ....]])

# OR

In [None]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(train_set.targets, train_preds.argmax(dim=1))

# Plotting a confusion matrix

In [None]:
import matplotlib.pyplot as plt

from sklearn.metrics import confusion_matrix
from resources.plotcm import plot_confusion_matrix # locally

In [None]:
cm = confusion_matrix(train_set.targets, train_preds.argmax(din=1))
names = ('T-Shirt/top', 'Trousers', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot')
plt.figure(figsize=(10, 10))
plot_confusion_matrix(cm, names)

# Visualizing with tensorboard

- make an alias for tensorboard:
- alias tensorboard='python3 -m tensorboard.main'

- Start tensorboard server:
- $ tensorboard --logdir=data/dasd/sfa/

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms

torch.set_printoptions(linewidth=120) # display options for output
torch.set_grad_enabled(True) # Already on by default

from torch.utils.tensorboard import SummaryWriter