<a href="https://colab.research.google.com/github/eisbetterthanpi/pytorch/blob/main/lucid_perceiverio_nn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor #, Lambda, Compose
import matplotlib.pyplot as plt
# https://pytorch.org/tutorials/beginner/basics/quickstart_tutorial.html

training_data = datasets.FashionMNIST(root="data", train=True, download=True,transform=ToTensor(),)
test_data = datasets.FashionMNIST(root="data", train=False, download=True, transform=ToTensor(),)
# print(training_data)

batch_size = 64
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)
# test_dataloader: #image, label

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")
!pip install perceiver-pytorch


Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to data/FashionMNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/26421880 [00:00<?, ?it/s]

Extracting data/FashionMNIST/raw/train-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/29515 [00:00<?, ?it/s]

Extracting data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/4422102 [00:00<?, ?it/s]

Extracting data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/5148 [00:00<?, ?it/s]

Extracting data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw

Using cuda device
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting perceiver-pytorch
  Downloading perceiver_pytorch-0.8.3-py3-none-any.whl (12 kB)
Collecting einops>=0.3
  Downloading einops-0.4.1-py3-none-any.whl (28 kB)
Installing collected packages: einops, perceiver-pytorch
Successfully installed einops-0.4.1 perceiver-pytorch-0.8.3


#### nn

In [None]:
# nn
# https://pytorch.org/tutorials/beginner/basics/buildmodel_tutorial.html
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512), #apply linear transformation
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork().to(device) # create an instance and move it to device (cache?)
print(model)
X = torch.rand(5,1, 28, 28, device=device)
logits = model(X)
print(logits.shape)
print(logits)
pred_probab = nn.Softmax(dim=1)(logits)
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")


NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)
torch.Size([5, 10])
tensor([[-0.0013,  0.0932, -0.0915, -0.0348,  0.0589,  0.0487, -0.0154, -0.0363,
          0.0464,  0.0953],
        [ 0.0333,  0.0832, -0.0489,  0.0007,  0.0307,  0.0498, -0.0343, -0.0187,
          0.0242,  0.0179],
        [ 0.0280,  0.1095, -0.1007, -0.0135,  0.0722,  0.0440, -0.0340, -0.0639,
          0.0205,  0.0609],
        [ 0.0351,  0.1493, -0.0528, -0.0630,  0.0378, -0.0335, -0.0442, -0.0830,
          0.0865,  0.0337],
        [ 0.0054,  0.1307, -0.1338, -0.0194,  0.0781,  0.0551, -0.0652, -0.0623,
          0.0231,  0.0658]], device='cuda:0', grad_fn=<AddmmBackward0>)
Predicted class: tensor([9, 1, 1, 1, 1], device='cuda:0')


#### PerceiverIO

In [None]:

import torch
from perceiver_pytorch import PerceiverIO
# https://github.com/lucidrains/perceiver-pytorch
model = PerceiverIO(
    dim = 28*28,                    # dimension of sequence to be encoded
    queries_dim = 10,            # dimension of decoder queries
    logits_dim = None,            # dimension of final logits
    depth = 6,                   # depth of net
    num_latents = 128,           # number of latents, or induced set points, or centroids. different papers giving it different names
    latent_dim = 128,            # latent dimension
    cross_heads = 1,             # number of heads for cross attention. paper said 1
    latent_heads = 8,            # number of heads for latent self attention, 8
    cross_dim_head = 64,         # number of dimensions per cross attention head
    latent_dim_head = 64,        # number of dimensions per latent self attention head
    weight_tie_layers = False    # whether to weight tie layers (optional, as indicated in the diagram)
).to(device)

# model = PerceiverIO( #https://github.com/lucidrains/perceiver-pytorch/blob/main/perceiver_pytorch/perceiver_io.py
#     dim = 28*28,                    # dimension of sequence to be encoded
#     queries_dim = 32,            # dimension of decoder queries
#     # logits_dim = 10,         None   # dimension of final logits?
#     depth = 6,                   # depth of net
# )


# seq = torch.randn(1, 512, 32)
seq = torch.randn(5, 1, 28*28, device=device)
# queries = torch.randn(128, 32)
queries = torch.randn(1, 10, device=device)
logits = model(seq, queries = queries) # (1, 128, 100) - (batch, decoder seq, logits dim)
print(logits.shape)
if logits.dim()==3:
    logits=logits.squeeze(dim=1)
    # logits=logits.squeeze()
print(logits.shape)
print(logits)
pred_probab = nn.Softmax(dim=1)(logits)
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")


rand = torch.randn(1, 32, device=device)


torch.Size([5, 1, 10])
torch.Size([5, 10])
tensor([[-0.1030, -0.0022,  0.0743, -0.1157, -0.0273,  0.3297,  0.0989, -0.2940,
          0.0563, -0.2379],
        [-0.0372,  0.0368, -0.0437, -0.0644, -0.0031,  0.1651,  0.3213,  0.1422,
         -0.1121,  0.2000],
        [-0.0401, -0.1313, -0.1995, -0.0261,  0.0365,  0.1482,  0.2943,  0.0804,
         -0.0692, -0.0888],
        [ 0.0567,  0.1501, -0.0955, -0.3301, -0.2079,  0.1874, -0.0091, -0.1696,
         -0.1224, -0.2160],
        [-0.1451, -0.1224,  0.0519,  0.0072, -0.3229,  0.1852,  0.1805, -0.2019,
         -0.0330, -0.1617]], device='cuda:0', grad_fn=<SqueezeBackward1>)
Predicted class: tensor([5, 6, 6, 5, 5], device='cuda:0')


In [None]:
# seq = torch.randn(64, 1, 28*28)
# seq = torch.randn(1, 1, 28*28)
# queries = rand
# queries = torch.ones(1, 32, device=device)
queries = torch.zeros(1, 10, device=device)

logits = model(seq, queries = queries) # (1, 128, 100) - (batch, decoder seq, logits dim)
print(logits.shape)
print(logits)

In [None]:
X = torch.rand(1,1, 28, 28, device=device)
# X = torch.rand(1,1,28*28, device=device)
if X.dim()==1:
    print("inn")
    X=X.unsqueeze(dim=0)
elif X.dim() not in [1,2]: print("erm ",X.dim())
X=X.flatten(start_dim=1, end_dim=-1) #(start_dim=1)
X=X.unsqueeze(dim=1)
print(X.shape)

def preprocess(X):
    if X.dim()==1:
        print("inn")
        X=X.unsqueeze(dim=0)
    # elif X.dim() not in [1,2]: print("erm ",X.dim())
    X=X.flatten(start_dim=1, end_dim=-1) #(start_dim=1)
    X=X.unsqueeze(dim=1)
    # print(X.shape)
    return X

def postprocess(logits):
    if logits.dim()==3:
        logits=logits.squeeze(dim=1)
        # logits=logits.squeeze()
    # print(logits.shape)
    # print(logits)
    # pred_probab = nn.Softmax(dim=1)(logits)
    # y_pred = pred_probab.argmax(1)
    # return y_pred
    return logits

# logits = model(X)
# logits = model(X,queries)
logits = model(X,queries =queries)
print(logits.shape)
print(logits)
pred_probab = nn.Softmax(dim=1)(logits)
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")

erm  4
torch.Size([1, 1, 784])
torch.Size([1, 1, 10])
tensor([[[ 0.1180, -0.2423,  0.0957,  0.0012, -0.2611, -0.0442,  0.4973,
          -0.0043, -0.2439, -0.1756]]], device='cuda:0',
       grad_fn=<AddBackward0>)
Predicted class: tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], device='cuda:0')


#### train test function

In [None]:
# train test function
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device) # print("X ",X.shape) #torch.Size([64, 1, 28, 28])
        # pred = model(X) # Compute prediction error
        X=preprocess(X)
        logits = model(X,queries=queries)
        pred = postprocess(logits)
        loss = loss_fn(pred, y) # print("pred ",pred.shape) #torch.Size([64, 10])
        optimizer.zero_grad() # Backpropagation
        loss.backward()
        optimizer.step()
        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            # pred = model(X)
            X=preprocess(X)
            logits = model(X,queries=queries)
            pred = postprocess(logits)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")


#### wwwwww

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")
torch.save(model.state_dict(), "model.pth")
print("Saved PyTorch Model State to model.pth")
# model = NeuralNetwork()
# model.load_state_dict(torch.load("model.pth"))
# perceiverio 79.7


Epoch 1
-------------------------------
loss: 2.067780  [    0/60000]
loss: 1.879863  [ 6400/60000]
loss: 1.677340  [12800/60000]
loss: 1.591202  [19200/60000]
loss: 1.443469  [25600/60000]
loss: 1.365009  [32000/60000]
loss: 1.273728  [38400/60000]
loss: 1.163322  [44800/60000]
loss: 1.147443  [51200/60000]
loss: 1.012249  [57600/60000]
Test Error: 
 Accuracy: 66.1%, Avg loss: 1.058853 

Epoch 2
-------------------------------
loss: 1.066996  [    0/60000]
loss: 1.024284  [ 6400/60000]
loss: 0.865718  [12800/60000]
loss: 1.004418  [19200/60000]
loss: 0.944259  [25600/60000]
loss: 0.898780  [32000/60000]
loss: 0.926899  [38400/60000]
loss: 0.849277  [44800/60000]
loss: 0.855602  [51200/60000]
loss: 0.736381  [57600/60000]
Test Error: 
 Accuracy: 72.9%, Avg loss: 0.799016 

Epoch 3
-------------------------------
loss: 0.787006  [    0/60000]
loss: 0.778557  [ 6400/60000]
loss: 0.612722  [12800/60000]
loss: 0.802568  [19200/60000]
loss: 0.774580  [25600/60000]
loss: 0.717348  [32000/600

RuntimeError: ignored

In [None]:
classes = ["T-shirt/top","Trouser","Pullover","Dress","Coat","Sandal","Shirt","Sneaker","Bag","Ankle boot",]
model.eval()
import random
n=random.randint(0,1000)
print(n)
# x, y = test_data[n][0], test_data[n][1]
x, y = torch.tensor(test_data[n][0],device=device), torch.tensor(test_data[n][1],device=device)

with torch.no_grad():
    # pred = model(x)
    x=preprocess(x)
    logits = model(x,queries=queries)
    pred = postprocess(logits)
    predicted, actual = classes[pred[0].argmax(0)], classes[y]
    print(f'Predicted: "{predicted}", Actual: "{actual}"')


5
Predicted: "Trouser", Actual: "Trouser"


  import sys
