<a href="https://colab.research.google.com/github/amrutadeo-22/resnet/blob/main/PatternNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install datasets


Collecting datasets
  Downloading datasets-3.3.2-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Downloading datasets-3.3.2-py3-none-any.whl (485 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m485.4/485.4 kB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading multiprocess-0.70.16-py311-none-any.whl (143 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.5/143.5 kB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading x

In [None]:
import torch
import os
from torch import nn, optim
from torch.utils.data import DataLoader, IterableDataset
from torchvision import transforms
import torch.nn.functional as F
from einops import repeat
from torch.amp import autocast, GradScaler
from datasets import load_dataset
from PIL import Image

dataset = load_dataset("blanchon/PatternNet", split="train", streaming=True)

transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])


class PatternNetDataset(IterableDataset):
    def __init__(self, dataset, transform=None):
        self.dataset = dataset
        self.transform = transform

    def __iter__(self):
        for item in self.dataset:
            image = item["image"]
            label = item["label"]
            if self.transform:
                image = self.transform(image)
            yield image, torch.tensor(label, dtype=torch.long)


train_loader = DataLoader(PatternNetDataset(dataset, transform=transform),
                          batch_size=64, shuffle=False, num_workers=2, pin_memory=True)


class Perceiver(nn.Module):
    def __init__(self, input_dim, num_classes, latent_dim=256, num_latents=64, depth=3, dropout=0.1):
        super().__init__()
        self.depth = depth
        self.latents = nn.Parameter(torch.randn(1, num_latents, latent_dim))
        self.data_proj = nn.Linear(input_dim, latent_dim)

        def get_attention(dim_in, dim_out, heads=8, dim_head=32):
            inner_dim = heads * dim_head
            return nn.Sequential(
                nn.Linear(dim_in, inner_dim),
                nn.ReLU(),
                nn.Linear(inner_dim, dim_out),
            )

        self.cross_attn = get_attention(latent_dim, latent_dim)
        self.self_attn = get_attention(latent_dim, latent_dim)
        self.feed_forward = nn.Sequential(
            nn.Linear(latent_dim, latent_dim * 2),
            nn.ReLU(),
            nn.Linear(latent_dim * 2, latent_dim),
            nn.Dropout(dropout),
        )
        self.to_logits = nn.Sequential(
            nn.LayerNorm(latent_dim),
            nn.Linear(latent_dim, num_classes),
        )

    def forward(self, x):
        b, *_ = x.shape
        x = self.data_proj(x.flatten(1))
        x = repeat(x, "b d -> b 1 d")

        latents = repeat(self.latents, "1 n d -> b n d", b=b)
        latents = latents + self.cross_attn(x)

        for _ in range(self.depth):
            latents = latents + self.self_attn(latents)
            latents = latents + self.feed_forward(latents)

        return self.to_logits(latents.mean(dim=1))


def train_patternnet():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = Perceiver(input_dim=128*128*3, num_classes=38).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=3e-4)
    scaler = GradScaler("cuda")

    for epoch in range(10):
        model.train()
        total_loss, correct, total_samples = 0, 0, 0

        for batch_idx, (images, labels) in enumerate(train_loader):
            images, labels = images.to(device), labels.to(device)
            images = images.flatten(1)

            optimizer.zero_grad()
            with autocast("cuda"):
                outputs = model(images)
                loss = criterion(outputs, labels)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            total_loss += loss.item()
            correct += (outputs.argmax(1) == labels).sum().item()
            total_samples += labels.size(0)

        print(f"Epoch {epoch+1}/10: Loss: {total_loss / (batch_idx + 1):.4f}, "
              f"Accuracy: {correct / total_samples * 100:.2f}%")  # Use total_samples

# Run training
if __name__ == "__main__":
    train_patternnet()




Epoch 1/10: Loss: 3.2877, Accuracy: 30.40%
Epoch 2/10: Loss: 3.6458, Accuracy: 3.02%
Epoch 3/10: Loss: 3.1236, Accuracy: 2.89%
Epoch 4/10: Loss: 2.5790, Accuracy: 10.87%
Epoch 5/10: Loss: 2.4332, Accuracy: 10.50%
Epoch 6/10: Loss: 2.2772, Accuracy: 13.42%
Epoch 7/10: Loss: 2.5782, Accuracy: 18.10%
Epoch 8/10: Loss: 2.0879, Accuracy: 17.99%
Epoch 9/10: Loss: 2.0936, Accuracy: 18.62%
Epoch 10/10: Loss: 1.9522, Accuracy: 20.88%


In [None]:
import torch
import os
from torch import nn, optim
from torch.utils.data import DataLoader, IterableDataset
from torchvision import transforms
import torch.nn.functional as F
from einops import repeat
from torch.amp import autocast, GradScaler
from datasets import load_dataset


dataset = load_dataset("blanchon/PatternNet", split="train", streaming=True)


transform = transforms.Compose([
    transforms.Resize((128, 128)),  # Resize for CNN
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])


class PatternNetDataset(IterableDataset):
    def __init__(self, dataset, transform=None):
        self.dataset = dataset
        self.transform = transform

    def __iter__(self):
        for item in self.dataset:
            image = item["image"]
            label = item["label"]
            if self.transform:
                image = self.transform(image)
            yield image, torch.tensor(label, dtype=torch.long)


train_loader = DataLoader(PatternNetDataset(dataset, transform=transform),
                          batch_size=32, shuffle=False, num_workers=2, pin_memory=True)


class HybridPerceiver(nn.Module):
    def __init__(self, num_classes, latent_dim=256, num_latents=64, depth=3):
        super().__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )


        self.flat_dim = 128 * (128 // 8) * (128 // 8)  # Adjust for downsampling
        self.latents = nn.Parameter(torch.randn(1, num_latents, latent_dim))
        self.data_proj = nn.Linear(self.flat_dim, latent_dim)

        def get_attention(dim_in, dim_out, heads=8, dim_head=32):
            inner_dim = heads * dim_head
            return nn.Sequential(
                nn.Linear(dim_in, inner_dim),
                nn.ReLU(),
                nn.Linear(inner_dim, dim_out),
            )

        self.cross_attn = get_attention(latent_dim, latent_dim)
        self.self_attn = get_attention(latent_dim, latent_dim)
        self.feed_forward = nn.Sequential(
            nn.Linear(latent_dim, latent_dim * 2),
            nn.ReLU(),
            nn.Linear(latent_dim * 2, latent_dim),
        )
        self.to_logits = nn.Sequential(
            nn.LayerNorm(latent_dim),
            nn.Linear(latent_dim, num_classes),
        )

    def forward(self, x):
        b, _, _, _ = x.shape
        x = self.cnn(x).flatten(1)  # CNN feature extractor
        x = self.data_proj(x)
        x = repeat(x, "b d -> b 1 d")

        latents = repeat(self.latents, "1 n d -> b n d", b=b)
        latents = latents + self.cross_attn(x)

        for _ in range(3):
            latents = latents + self.self_attn(latents)
            latents = latents + self.feed_forward(latents)

        return self.to_logits(latents.mean(dim=1))


def train_patternnet():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = HybridPerceiver(num_classes=38).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=1e-4)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.8)  # Reduce LR over time
    scaler = GradScaler("cuda")

    for epoch in range(25):
        model.train()
        total_loss, correct, total_samples = 0, 0, 0

        for batch_idx, (images, labels) in enumerate(train_loader):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            with autocast("cuda"):
                outputs = model(images)
                loss = criterion(outputs, labels)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            total_loss += loss.item()
            correct += (outputs.argmax(1) == labels).sum().item()
            total_samples += labels.size(0)

        scheduler.step()
        print(f"Epoch {epoch+1}/25: Loss: {total_loss / (batch_idx + 1):.4f}, "
              f"Accuracy: {correct / total_samples * 100:.2f}%")

# Run training
if __name__ == "__main__":
    train_patternnet()


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/4.11k [00:00<?, ?B/s]



Epoch 1/25: Loss: 2.1511, Accuracy: 48.67%
Epoch 2/25: Loss: 1.6537, Accuracy: 48.35%
Epoch 3/25: Loss: 1.4479, Accuracy: 50.08%
Epoch 4/25: Loss: 1.3862, Accuracy: 50.57%
Epoch 5/25: Loss: 1.6580, Accuracy: 43.10%
Epoch 6/25: Loss: 1.7370, Accuracy: 40.41%
Epoch 7/25: Loss: 1.7888, Accuracy: 39.57%
Epoch 8/25: Loss: 1.4653, Accuracy: 45.43%
Epoch 9/25: Loss: 1.7358, Accuracy: 38.17%
Epoch 10/25: Loss: 1.3708, Accuracy: 47.05%


In [None]:
import torch
import os
from torch import nn, optim
from torch.utils.data import DataLoader, IterableDataset
from torchvision import transforms
import torch.nn.functional as F
from einops import repeat
from torch.amp import autocast, GradScaler
from datasets import load_dataset


dataset = load_dataset("blanchon/PatternNet", split="train", streaming=True)


transform = transforms.Compose([
    transforms.Resize((128, 128)),  # Resize for CNN
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])


class PatternNetDataset(IterableDataset):
    def __init__(self, dataset, transform=None):
        self.dataset = dataset
        self.transform = transform

    def __iter__(self):
        for item in self.dataset:
            image = item["image"]
            label = item["label"]
            if self.transform:
                image = self.transform(image)
            yield image, torch.tensor(label, dtype=torch.long)


train_loader = DataLoader(PatternNetDataset(dataset, transform=transform),
                          batch_size=32, shuffle=False, num_workers=2, pin_memory=True)


class HybridPerceiver(nn.Module):
    def __init__(self, num_classes, latent_dim=256, num_latents=64, depth=3):
        super().__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )


        self.flat_dim = 128 * (128 // 8) * (128 // 8)
        self.latents = nn.Parameter(torch.randn(1, num_latents, latent_dim))
        self.data_proj = nn.Linear(self.flat_dim, latent_dim)

        def get_attention(dim_in, dim_out, heads=8, dim_head=32):
            inner_dim = heads * dim_head
            return nn.Sequential(
                nn.Linear(dim_in, inner_dim),
                nn.ReLU(),
                nn.Linear(inner_dim, dim_out),
            )

        self.cross_attn = get_attention(latent_dim, latent_dim)
        self.self_attn = get_attention(latent_dim, latent_dim)
        self.feed_forward = nn.Sequential(
            nn.Linear(latent_dim, latent_dim * 2),
            nn.ReLU(),
            nn.Linear(latent_dim * 2, latent_dim),
        )
        self.to_logits = nn.Sequential(
            nn.LayerNorm(latent_dim),
            nn.Linear(latent_dim, num_classes),
        )

    def forward(self, x):
        b, _, _, _ = x.shape
        x = self.cnn(x).flatten(1)
        x = self.data_proj(x)
        x = repeat(x, "b d -> b 1 d")

        latents = repeat(self.latents, "1 n d -> b n d", b=b)
        latents = latents + self.cross_attn(x)

        for _ in range(3):
            latents = latents + self.self_attn(latents)
            latents = latents + self.feed_forward(latents)

        return self.to_logits(latents.mean(dim=1))

# Train function
def train_patternnet():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = HybridPerceiver(num_classes=38).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=1e-4)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.8)
    scaler = GradScaler("cuda")

    for epoch in range(20):  # Increased epochs
        model.train()
        total_loss, correct, total_samples = 0, 0, 0

        for batch_idx, (images, labels) in enumerate(train_loader):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            with autocast("cuda"):
                outputs = model(images)
                loss = criterion(outputs, labels)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            total_loss += loss.item()
            correct += (outputs.argmax(1) == labels).sum().item()
            total_samples += labels.size(0)

        scheduler.step()
        print(f"Epoch {epoch+1}/25: Loss: {total_loss / (batch_idx + 1):.4f}, "
              f"Accuracy: {correct / total_samples * 100:.2f}%")

# Run training
if __name__ == "__main__":
    train_patternnet()


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/4.11k [00:00<?, ?B/s]



Epoch 1/25: Loss: 2.3217, Accuracy: 44.63%


'(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: 4d406444-4f7e-4c57-95db-8b972cabb356)')' thrown while requesting GET https://huggingface.co/datasets/blanchon/PatternNet/resolve/ccaf401549ac1f74824f809564db3332ab580309/data/train-00000-of-00003.parquet
Retrying in 1s [Retry 1/5].
'(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: 5b1fbde8-c49e-4342-b92a-a11746fb962c)')' thrown while requesting GET https://huggingface.co/datasets/blanchon/PatternNet/resolve/ccaf401549ac1f74824f809564db3332ab580309/data/train-00002-of-00003.parquet
Retrying in 1s [Retry 1/5].


Epoch 2/25: Loss: 1.9568, Accuracy: 42.93%
Epoch 3/25: Loss: 1.5443, Accuracy: 49.34%


'(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: 5151dd50-f4ff-4263-aed6-b8888c777b54)')' thrown while requesting GET https://huggingface.co/datasets/blanchon/PatternNet/resolve/ccaf401549ac1f74824f809564db3332ab580309/data/train-00002-of-00003.parquet
Retrying in 1s [Retry 1/5].


Epoch 4/25: Loss: 1.3549, Accuracy: 49.95%


'(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: 40be05bc-72c8-444c-b737-9fe44c2838a9)')' thrown while requesting GET https://huggingface.co/datasets/blanchon/PatternNet/resolve/ccaf401549ac1f74824f809564db3332ab580309/data/train-00001-of-00003.parquet
Retrying in 1s [Retry 1/5].


Epoch 5/25: Loss: 1.8003, Accuracy: 41.22%
Epoch 6/25: Loss: 2.6133, Accuracy: 23.21%
Epoch 7/25: Loss: 2.0683, Accuracy: 31.11%
Epoch 8/25: Loss: 1.8144, Accuracy: 36.14%
Epoch 9/25: Loss: 1.6049, Accuracy: 41.15%


'(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: d6047ef0-52db-46a2-b3bd-e5f8010c0ab6)')' thrown while requesting GET https://huggingface.co/datasets/blanchon/PatternNet/resolve/ccaf401549ac1f74824f809564db3332ab580309/data/train-00001-of-00003.parquet
Retrying in 1s [Retry 1/5].


Epoch 10/25: Loss: 1.6702, Accuracy: 40.41%


'(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: d8c576ba-d252-435b-ab93-44d728ce6814)')' thrown while requesting GET https://huggingface.co/datasets/blanchon/PatternNet/resolve/ccaf401549ac1f74824f809564db3332ab580309/data/train-00001-of-00003.parquet
Retrying in 1s [Retry 1/5].
