# PyTorch

## Librerias

In [1]:

# generales
import torch
import numpy as np
import os
import pandas as pd


# visualizacion
import matplotlib.pyplot as plt


# bases de datos 
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torchvision import datasets, transforms


# preprocesamiento
from torchvision.transforms import ToTensor, Lambda
from torchvision.io import read_image


# modelos deep learning
from torch import nn


# modelos pre-entrenados


# interpretacion de modelo




## Intro

In [2]:
# cargar bases de datos

# cargar datos de entrenamiento
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
)

# cargar datos de prueba
test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
)

training_data, test_data

(Dataset FashionMNIST
     Number of datapoints: 60000
     Root location: data
     Split: Train
     StandardTransform
 Transform: ToTensor(),
 Dataset FashionMNIST
     Number of datapoints: 10000
     Root location: data
     Split: Test
     StandardTransform
 Transform: ToTensor())

In [3]:
# crear data loaders

batch_size = 64
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

for X, y in test_dataloader:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break


Shape of X [N, C, H, W]: torch.Size([64, 1, 28, 28])
Shape of y: torch.Size([64]) torch.int64


In [4]:
# set GPU
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")
device

Using cpu device


'cpu'

In [5]:
# crear modelo NeuralNetwork

# capa de entrada: capa plana 1xN

# capas conectadas: 
# capa lineal(densa): 28x28, unidades=512, activacion=relu
# capa lineal(densa): 512x512, unidades=512, activacion=relu

# capa de saida: 512x10

class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork().to(device)
print(model)


NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [6]:
# optimizar modelo 

# funcion perdidas: CrossEntropyLoss()
# optimizador: SGD, learning_rate=1e-3 
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

In [7]:

# entrenar modelo
# parametros: dataloader, modelo, funcion perdidas, optimizador


def train(dataloader, model, loss_fn, optimizer):
    # extraer tamaño de datos
    size = len(dataloader.dataset)
    
    # entrenar modelo
    model.train()
    
    
    for batch, (X, y) in enumerate(dataloader): # iterar sobre los datos de entrenamiento
        
        # extraer y-target, x variables
        X, y = X.to(device), y.to(device)
        
    
        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)


        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [8]:
# evaluacion de modelo

# evaluar modelo
# parametros: dataloader, modelo, funcion perdidas

def test(dataloader, model, loss_fn):
    
    size = len(dataloader.dataset) # extraer tamaño de datos
    num_batches = len(dataloader) # extraer numero de batches
    
    # evaluar modelo
    model.eval()
    
    # inicializar test_loss y correct
    test_loss, correct = 0, 0
    
    
    with torch.no_grad():
        for X, y in dataloader: # iterar sobre los datos de entrenamiento
            X, y = X.to(device), y.to(device)
            
            # predicciones
            pred = model(X)
            
            # funcion perdidas (test)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [9]:
# entrenar y evaluar modelo
# epochs=5

epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 2.301743  [   64/60000]


loss: 2.288517  [ 6464/60000]
loss: 2.276055  [12864/60000]
loss: 2.282048  [19264/60000]
loss: 2.249070  [25664/60000]
loss: 2.227305  [32064/60000]
loss: 2.238309  [38464/60000]
loss: 2.200297  [44864/60000]
loss: 2.207671  [51264/60000]
loss: 2.172919  [57664/60000]
Test Error: 
 Accuracy: 40.8%, Avg loss: 2.168171 

Epoch 2
-------------------------------
loss: 2.177305  [   64/60000]
loss: 2.172227  [ 6464/60000]
loss: 2.114214  [12864/60000]
loss: 2.143045  [19264/60000]
loss: 2.093748  [25664/60000]
loss: 2.028236  [32064/60000]
loss: 2.070398  [38464/60000]
loss: 1.984049  [44864/60000]
loss: 1.999266  [51264/60000]
loss: 1.937078  [57664/60000]
Test Error: 
 Accuracy: 54.6%, Avg loss: 1.926371 

Epoch 3
-------------------------------
loss: 1.951425  [   64/60000]
loss: 1.933151  [ 6464/60000]
loss: 1.806949  [12864/60000]
loss: 1.865690  [19264/60000]
loss: 1.769208  [25664/60000]
loss: 1.696826  [32064/60000]
loss: 1.740330  [38464/60000]
loss: 1.618227  [44864/60000]
loss: 

In [10]:
# guardar modelo (persistencia)
torch.save(model.state_dict(), "data/model.pth")
print("Saved PyTorch Model State to model.pth")


Saved PyTorch Model State to model.pth


In [11]:
# cargar modelo
model = NeuralNetwork().to(device)
model.load_state_dict(torch.load("data/model.pth"))

model

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)

In [12]:
# predicciones (testing)

classes = [
    "T-shirt/top",
    "Trouser",
    "Pullover",
    "Dress",
    "Coat",
    "Sandal",
    "Shirt",
    "Sneaker",
    "Bag",
    "Ankle boot",
]

model.eval()
x, y = test_data[0][0], test_data[0][1]
with torch.no_grad():
    x = x.to(device)
    pred = model(x)
    predicted, actual = classes[pred[0].argmax(0)], classes[y]
    print(f'Predicted: "{predicted}", Actual: "{actual}"')

Predicted: "Ankle boot", Actual: "Ankle boot"


## Tensores

In [13]:
# crear tensor a partir de  datos
data = [[1, 2],[3, 4]]
x_data = torch.tensor(data)

data

[[1, 2], [3, 4]]

### Crear Tensores

In [14]:
# crear tensor a partir de arrays (numpy)   
np_array = np.array(data)
x_np = torch.from_numpy(np_array)
x_np

tensor([[1, 2],
        [3, 4]], dtype=torch.int32)

In [15]:
# crear tensor a partir de otro tensor
x_ones = torch.ones_like(x_data) # retains the properties of x_data
print(f"Ones Tensor: \n {x_ones} \n")

x_rand = torch.rand_like(x_data, dtype=torch.float) # overrides the datatype of x_data
print(f"Random Tensor: \n {x_rand} \n")

Ones Tensor: 
 tensor([[1, 1],
        [1, 1]]) 

Random Tensor: 
 tensor([[0.5519, 0.9591],
        [0.9058, 0.5021]]) 



In [16]:
# crear tensores de diferentes formas
shape = (2,3,)
rand_tensor = torch.rand(shape)
ones_tensor = torch.ones(shape)
zeros_tensor = torch.zeros(shape)

print(f"Random Tensor: \n {rand_tensor} \n")
print(f"Ones Tensor: \n {ones_tensor} \n")
print(f"Zeros Tensor: \n {zeros_tensor}")

Random Tensor: 
 tensor([[0.2681, 0.1491, 0.6284],
        [0.1170, 0.2301, 0.7552]]) 

Ones Tensor: 
 tensor([[1., 1., 1.],
        [1., 1., 1.]]) 

Zeros Tensor: 
 tensor([[0., 0., 0.],
        [0., 0., 0.]])


In [17]:

# atributos de tensor
tensor = torch.rand(3,4)

print(f"Shape of tensor: {tensor.shape}")
print(f"Datatype of tensor: {tensor.dtype}")
print(f"Device tensor is stored on: {tensor.device}")

Shape of tensor: torch.Size([3, 4])
Datatype of tensor: torch.float32
Device tensor is stored on: cpu


### Operaciones

#### Indexing y Slicing

In [18]:
# indexing y slicing de tensores
tensor = torch.ones(4, 4)
print(f"First row: {tensor[0]}")
print(f"First column: {tensor[:, 0]}")
print(f"Last column: {tensor[..., -1]}")
tensor[:,1] = 0
print(tensor)

First row: tensor([1., 1., 1., 1.])
First column: tensor([1., 1., 1., 1.])
Last column: tensor([1., 1., 1., 1.])
tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])


In [19]:
# concatenar tensores
# dim=0 por filas, dim=1 por columnas
t1 = torch.cat([tensor, tensor, tensor], dim=1)
print(t1)

tensor([[1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.]])


#### Operaciones Aritmeticas



In [20]:
y1 = tensor @ tensor.T
y2 = tensor.matmul(tensor.T)

y3 = torch.rand_like(y1)
torch.matmul(tensor, tensor.T, out=y3)


# This computes the element-wise product. z1, z2, z3 will have the same value
z1 = tensor * tensor
z2 = tensor.mul(tensor)

z3 = torch.rand_like(tensor)
torch.mul(tensor, tensor, out=z3)

tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])

#### Single-element tensors 

In [21]:
agg = tensor.sum()
agg_item = agg.item()
print(agg_item, type(agg_item))

12.0 <class 'float'>


#### Operaciones In-Place

In [22]:
print(f"{tensor} \n")
tensor.add_(5)
print(tensor)

tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]]) 

tensor([[6., 5., 6., 6.],
        [6., 5., 6., 6.],
        [6., 5., 6., 6.],
        [6., 5., 6., 6.]])


#### Conversiones Tensor - Numpy

In [23]:
t = torch.ones(5)
print(f"t: {t}")
n = t.numpy()
print(f"n: {n}")

t: tensor([1., 1., 1., 1., 1.])
n: [1. 1. 1. 1. 1.]


In [24]:
t.add_(1)
print(f"t: {t}")
print(f"n: {n}")

t: tensor([2., 2., 2., 2., 2.])
n: [2. 2. 2. 2. 2.]


In [25]:
n = np.ones(5)
t = torch.from_numpy(n)

np.add(n, 1, out=n)
print(f"t: {t}")
print(f"n: {n}")


t: tensor([2., 2., 2., 2., 2.], dtype=torch.float64)
n: [2. 2. 2. 2. 2.]


## Bases de datos y Dataloaders


* root: path de base de datos 
* train: flag para datos de entrenamiento
* download: flag para descargar datos de Internet
* transform: feature transformations 
* target_transform: label transformations

In [26]:
# cargar datos de entrenamiento 
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

# cargar datos de prueba 
test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

training_data, test_data

(Dataset FashionMNIST
     Number of datapoints: 60000
     Root location: data
     Split: Train
     StandardTransform
 Transform: ToTensor(),
 Dataset FashionMNIST
     Number of datapoints: 10000
     Root location: data
     Split: Test
     StandardTransform
 Transform: ToTensor())

### Crear Bases de Datos Personalizadas

In [27]:


""" 
# mapeao de labels 
labels_map = {
    0: "T-Shirt",
    1: "Trouser",
    2: "Pullover",
    3: "Dress",
    4: "Coat",
    5: "Sandal",
    6: "Shirt",
    7: "Sneaker",
    8: "Bag",
    9: "Ankle Boot",
}

# Plot imagenes
figure = plt.figure(figsize=(8, 8))
cols, rows = 3, 3
for i in range(1, cols * rows + 1):
    sample_idx = torch.randint(len(training_data), size=(1,)).item()
    img, label = training_data[sample_idx]
    figure.add_subplot(rows, cols, i)
    plt.title(labels_map[label])
    plt.axis("off")
    plt.imshow(img.squeeze(), cmap="gray")
plt.show()
"""

' \n# mapeao de labels \nlabels_map = {\n    0: "T-Shirt",\n    1: "Trouser",\n    2: "Pullover",\n    3: "Dress",\n    4: "Coat",\n    5: "Sandal",\n    6: "Shirt",\n    7: "Sneaker",\n    8: "Bag",\n    9: "Ankle Boot",\n}\n\n# Plot imagenes\nfigure = plt.figure(figsize=(8, 8))\ncols, rows = 3, 3\nfor i in range(1, cols * rows + 1):\n    sample_idx = torch.randint(len(training_data), size=(1,)).item()\n    img, label = training_data[sample_idx]\n    figure.add_subplot(rows, cols, i)\n    plt.title(labels_map[label])\n    plt.axis("off")\n    plt.imshow(img.squeeze(), cmap="gray")\nplt.show()\n'

In [28]:
# crear clase base de datos personalizada
# atributos: labels, dir(path), transform, target_transform
# metodos: inicializador, len, getitem
class CustomImageDataset(Dataset):
    
    # inicializar clase base de datos personalizada
    # inicializar atributos de clase
    # labels, dir(path), transform, target_transform
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    # retorna numero de observaciones en base de datos
    def __len__(self):
        return len(self.img_labels)

    # retorna observacion por indice     
    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = read_image(img_path)
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

### Dataloaders

In [29]:
# crear dataloaders para datos de entrenamiento y prueba
# parametros: datos, batch_size, shuffle
train_dataloader = DataLoader(training_data, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=64, shuffle=True)

In [30]:
""" 
# extraer x variable, y labels
train_features, train_labels = next(iter(train_dataloader))
print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape: {train_labels.size()}")
img = train_features[0].squeeze()
label = train_labels[0]
plt.imshow(img, cmap="gray")
plt.show()
print(f"Label: {label}")
 """


' \n# extraer x variable, y labels\ntrain_features, train_labels = next(iter(train_dataloader))\nprint(f"Feature batch shape: {train_features.size()}")\nprint(f"Labels batch shape: {train_labels.size()}")\nimg = train_features[0].squeeze()\nlabel = train_labels[0]\nplt.imshow(img, cmap="gray")\nplt.show()\nprint(f"Label: {label}")\n '

## Transformadores

In [31]:
# cargar bases de datos
# transform: transformar x variables a tensores, escalar (estandarizar) intensidad de imagen [0, 1]
# target_transform: transformar  y-target aplicar one-hot coding [0-1]

# funcion lambda: crear tensor de zeros tamaño 10, aplicar funcion scatter para int-codificar y-target

data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(), # transformar x variables a tensores
    target_transform=Lambda(lambda y: torch.zeros(10, dtype=torch.float).scatter_(0, torch.tensor(y), value=1)) # transformar y -target a 
)

# funcion lambda: crear tensor de zeros tamaño 10, aplicar funcion scatter para int-codificar y-target
target_transform = Lambda(lambda y: torch.zeros(
    10, dtype=torch.float).scatter_(dim=0, index=torch.tensor(y), value=1))

data

Dataset FashionMNIST
    Number of datapoints: 60000
    Root location: data
    Split: Train
    StandardTransform
Transform: ToTensor()
Target transform: Lambda()

## Modelos - Redes Neuronales

### Hardware  Accelerator 

In [32]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")


Using cpu device


###  Crear Clase de Modelo

In [33]:
# crear clase Module
# metodos: inicializar, forward

# capa de entrada: capa plana 1xN
# capas conectadas:linear_relu_stack, Sequential
# capa lineal(densa), 28x28 , unidades 512
# capa de activacion: relu
# capa lineal(densa), 512x512 , unidades 512
# capa de activacion: relu 

# capa de salida: capa lineal(densa), 512x10 unidades


class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits
    
# crear modelo, pasar a GPU (device)
model = NeuralNetwork().to(device)
print(model)# ver modelo (summary)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [34]:
X = torch.rand(1, 28, 28, device=device)

# predicciones
logits = model(X)
# probabilidades de clase, a partir de predicciones
pred_probab = nn.Softmax(dim=1)(logits)

y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")

logits

Predicted class: tensor([8])


tensor([[-0.1421, -0.0468,  0.0053, -0.0709,  0.0531, -0.0302,  0.0310,  0.0116,
          0.0682, -0.1025]], grad_fn=<AddmmBackward0>)

### Capas

* Flatten: 1xN
* Linear (densa): # unidades
* Sequential: contenedor de modelos por capas


* relu: funcion de activacion no lineal
* softmax: funcion de activacion no lineal (multi-clase)

In [3]:
input_image = torch.rand(3,28,28)
print(input_image.size())



torch.Size([3, 28, 28])


#### Flatten: Capa plana

Convertir entrada (nxm) en array de pixeles

In [4]:
flatten = nn.Flatten()
flat_image = flatten(input_image)
print(flat_image.size())
flatten

torch.Size([3, 784])


Flatten(start_dim=1, end_dim=-1)

#### Linear (Densa): Capa Densa

Aplicar combinación lineal de la entrada con pesos y bias de cada neurona


In [5]:
layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_image)
print(hidden1.size())

layer1

torch.Size([3, 20])


Linear(in_features=784, out_features=20, bias=True)

#### Capa de Activación

Funcion de activación no lineal

##### Relu

In [6]:
print(f"Before ReLU: {hidden1}\n\n")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}")

Before ReLU: tensor([[ 0.2851, -0.5149, -0.2989,  0.2719, -0.3299,  0.1551, -0.1265, -0.4536,
         -0.5093,  0.3027,  0.2896,  0.0717,  0.0934, -0.6869, -0.0346, -0.6649,
          0.1292, -0.1964, -0.0211, -0.5308],
        [-0.1319, -0.3941, -0.1507,  0.0980, -0.0961,  0.0532, -0.0933, -0.7224,
         -0.3430,  0.0238,  0.3405,  0.3760, -0.1988, -0.6249,  0.0837, -0.7921,
         -0.0655,  0.0244,  0.0909, -0.0384],
        [-0.0852, -0.3432, -0.0841, -0.1929, -0.2452,  0.0634, -0.1065, -0.7610,
         -0.4048,  0.1384,  0.2660, -0.2356, -0.0993, -0.7169,  0.1065, -0.3137,
          0.2573,  0.0872, -0.1384, -0.4193]], grad_fn=<AddmmBackward0>)


After ReLU: tensor([[0.2851, 0.0000, 0.0000, 0.2719, 0.0000, 0.1551, 0.0000, 0.0000, 0.0000,
         0.3027, 0.2896, 0.0717, 0.0934, 0.0000, 0.0000, 0.0000, 0.1292, 0.0000,
         0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0980, 0.0000, 0.0532, 0.0000, 0.0000, 0.0000,
         0.0238, 0.3405, 0.3760, 0.0000, 0.0000, 0.08

##### Softmax

Capa de salida (multi-clase). Escalar los logits de salida en [0,1] que representa la probabilidad de pertenencia de cada clase.

dim= numero de unidades (clases) de salida

In [10]:
softmax = nn.Softmax(dim=1)
pred_probab = softmax(logits)

softmax

Softmax(dim=1)

#### Sequential

Bloque (container) de capas

In [9]:
seq_modules = nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(20, 10)
)
input_image = torch.rand(3,28,28)
logits = seq_modules(input_image)

seq_modules


Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=20, bias=True)
  (2): ReLU()
  (3): Linear(in_features=20, out_features=10, bias=True)
)

In [13]:
# parametros
print(f"Model structure: {seq_modules}\n\n")

for name, param in seq_modules.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Model structure: Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=20, bias=True)
  (2): ReLU()
  (3): Linear(in_features=20, out_features=10, bias=True)
)


Layer: 1.weight | Size: torch.Size([20, 784]) | Values : tensor([[ 0.0119, -0.0236,  0.0088,  ..., -0.0200, -0.0262,  0.0137],
        [-0.0357,  0.0069,  0.0288,  ..., -0.0017, -0.0276, -0.0245]],
       grad_fn=<SliceBackward0>) 

Layer: 1.bias | Size: torch.Size([20]) | Values : tensor([ 0.0080, -0.0216], grad_fn=<SliceBackward0>) 

Layer: 3.weight | Size: torch.Size([10, 20]) | Values : tensor([[ 0.1685, -0.1955,  0.1910, -0.0173, -0.1203, -0.0149,  0.1271,  0.1559,
         -0.1917,  0.1514, -0.1506,  0.0066, -0.0822,  0.0743, -0.1624, -0.1522,
         -0.2195,  0.1612,  0.2169,  0.1822],
        [-0.0127,  0.0581,  0.0744,  0.1196,  0.0005, -0.1235,  0.1883,  0.1358,
         -0.0134, -0.1527,  0.1903,  0.0482,  0.1544,  0.1214,  0.1745, -0.0665,
          0.0929, -0.1491,  0.02

## GPU

In [36]:
if torch.cuda.is_available():
    tensor = tensor.to("cuda")

tensor.device

device(type='cpu')