# ConvKAN + CIFAR-10
Purpose: Fit a Convolutional KAN to the CIFAR-10 dataset, for benchmarking the KAN performance.

Furthermore, the PyTorch Lightning library is used for convenience.

I've copied some parts from the KAN experimentation and MNISTMLP, due to there being some degree of overlap between the two.

In [1]:
# Imports
import numpy as np
import pandas as pd
import torch
from torch import nn
import lightning as L
import torch.nn.functional as F
from torchvision.datasets import CIFAR10
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from lightning.pytorch.loggers import CSVLogger

import sys
sys.path.append('../convkans/kan_convolutional')
from KANConv import KAN_Convolutional_Layer

# Setup Device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Setup Randomness -- https://lightning.ai/docs/pytorch/stable/common/trainer.html
L.seed_everything(42, workers=True)

# CUDA Efficiency
torch.set_float32_matmul_precision('high')

Seed set to 42


In [2]:
# Dataset Setup
train_dataset = CIFAR10("./temp/", train=True, download=True)
test_dataset = CIFAR10("./temp/", train=False, download=True)

class LCDataset(Dataset): # Lightning Compatible Dataset
    def __init__(self, dataset):
        self.data = torch.tensor(dataset.data).type(torch.float32).transpose(1, 3).transpose(2, 3)
        self.target = torch.tensor((pd.get_dummies(pd.Series(dataset.targets)).map(lambda x: 1 if x == True else 0)).values).type(torch.float32)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.target[idx]

train_loader = DataLoader(LCDataset(train_dataset), batch_size=64, shuffle=True, num_workers=10)
test_loader = DataLoader(LCDataset(test_dataset), batch_size=10000, num_workers=10)

Files already downloaded and verified
Files already downloaded and verified


In [3]:
# Model Declaration
class CNNKan(L.LightningModule):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            KAN_Convolutional_Layer(in_channels=3, out_channels=24, kernel_size=(2, 2), grid_size=2, spline_order=2, device=device),
            nn.MaxPool2d(2),
            nn.BatchNorm2d(24),
            nn.ReLU(),
            
            KAN_Convolutional_Layer(in_channels=24, out_channels=32, kernel_size=(3, 3), grid_size=2, spline_order=2, device=device),
            nn.MaxPool2d(2),
            nn.BatchNorm2d(32),
            nn.ReLU(),

            KAN_Convolutional_Layer(in_channels=32, out_channels=32, kernel_size=(3, 3), grid_size=2, spline_order=2, device=device),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            
            nn.Flatten(1),
            nn.Linear(512, 10, bias=False)
        )
    
    def forward(self, x):
        return self.net(x)
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        y_pred = self(x)
        loss = F.cross_entropy(y_pred, y)
        self.log("train_loss", loss, prog_bar=True)
        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch
        y_pred = self(x)
        loss = F.cross_entropy(y_pred, y)
        v1 = torch.argmax(y_pred, dim=1)
        v2 = torch.argmax(y, dim=1)
        accuracy = torch.sum(torch.eq(v1, v2)) / len(y)
        self.log("test loss (cross entropy)", loss)
        self.log("accuracy", accuracy)

    def predict_step(self, batch, batch_idx, dataloader_idx=0):
        return self(batch)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-4)
        return optimizer

In [4]:
# Train + Test + Results
model = CNNKan()
trained_model = L.Trainer(max_epochs=3, deterministic=True, logger=CSVLogger("logs", name="CIFAR10ConvKAN"))
trained_model.fit(model, train_loader)
trained_model.test(model, test_loader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name | Type       | Params | Mode 
--------------------------------------------
0 | net  | Sequential | 103 K  | train
--------------------------------------------
103 K     Trainable params
0         Non-trainable params
103 K     Total params
0.415     Total estimated model params size (MB)
5609      Modules in train mode
0         Modules in eval mode


Training: |                                               | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=3` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |                                                | 0/? [00:00<?, ?it/s]

[{'test loss (cross entropy)': 1.3914238214492798,
  'accuracy': 0.4991999864578247}]