In [1]:
#cloning the CKAN git repository
!git clone https://github.com/AntonioTepsich/Convolutional-KANs.git
%cd Convolutional-KANs

Cloning into 'Convolutional-KANs'...
remote: Enumerating objects: 2379, done.[K
remote: Counting objects: 100% (409/409), done.[K
remote: Compressing objects: 100% (209/209), done.[K
remote: Total 2379 (delta 266), reused 311 (delta 196), pack-reused 1970 (from 1)[K
Receiving objects: 100% (2379/2379), 45.65 MiB | 35.33 MiB/s, done.
Resolving deltas: 100% (1286/1286), done.
/kaggle/working/Convolutional-KANs


In [2]:
#loading necessary libraries
%load_ext autoreload
%autoreload 2
import matplotlib.pyplot as plt
from tqdm import tqdm
import numpy as np
import pandas as pd
from sklearn.metrics import precision_score, recall_score, f1_score
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader
from architectures_28x28.CKAN_BN import CKAN_BN
from architectures_28x28.SimpleModels import *
from architectures_28x28.ConvNet import ConvNet
from architectures_28x28.KANConvs_MLP import KANC_MLP
from architectures_28x28.KKAN import KKAN_Convolutional_Network
from architectures_28x28.conv_and_kan import NormalConvsKAN
from kan_convolutional.KANConv import KAN_Convolutional_Layer

In [3]:
#defining transformations for the MNIST dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    #normalizing to [-1, 1]
    transforms.Normalize((0.5,), (0.5,))
])

#loading the MNIST dataset
train_dataset = MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = MNIST(root='./data', train=False, download=True, transform=transform)

#creating data loaders for training and testing
#dataLoader (refer: https://pytorch.org/tutorials/beginner/basics/data_tutorial.html)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9.91M/9.91M [00:00<00:00, 15.3MB/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28.9k/28.9k [00:00<00:00, 455kB/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1.65M/1.65M [00:00<00:00, 4.22MB/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4.54k/4.54k [00:00<00:00, 6.66MB/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw






In [4]:
class KANC_MLP(nn.Module):
    def __init__(self,grid_size: int = 5):
        super().__init__()
        self.conv1 = KAN_Convolutional_Layer(in_channels=1,
            out_channels= 5,
            kernel_size= (3,3),
            grid_size = grid_size
        )

        self.conv2 = KAN_Convolutional_Layer(in_channels=5,
            out_channels= 5,
            kernel_size = (3,3),
            grid_size = grid_size
        )

        self.pool1 = nn.MaxPool2d(
            kernel_size=(2, 2)
        )
        
        self.flat = nn.Flatten() 
        
        self.linear1 = nn.Linear(125, 10)
        self.name = f"KANC MLP (Small) (gs = {grid_size})"


    def forward(self, x):
        x = self.conv1(x)

        x = self.pool1(x)

        x = self.conv2(x)
        x = self.pool1(x)
        x = self.flat(x)
        #print(x.shape)
        x = self.linear1(x)
        x = F.log_softmax(x, dim=1)
        return x

In [5]:
# Checking if GPU is available and use it if possible
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initializing the model and moving it to the appropriate device
model_kanc = KANC_MLP().to(device)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_kanc.parameters(), lr=0.001)

In [6]:
#setting epochs for training
epochs = 1

#training the model
for epoch in range(epochs):
    model_kanc.train()
    total_loss = 0.0
    for images, labels in train_loader:
        #moving labels and images to the device (GPU or CPU)
        images, labels = images.to(device), labels.to(device)
        #zero the parameter gradients
        optimizer.zero_grad()
        #forward pass
        outputs = model_kanc(images)
        #calculating the loss
        loss = criterion(outputs, labels)
        #backward pass and optimize
        loss.backward()
        optimizer.step()
        #accumulating the loss for reporting
        total_loss += loss.item()

    #printing the average loss for the epoch
    print(f"Epoch [{epoch+1}/{epochs}], Loss: {total_loss/len(train_loader):.4f}")

Epoch [1/1], Loss: 0.2144


In [7]:
#evaluating the model
model_kanc.eval()
test_loss = 0
correct = 0
all_targets = []
all_predictions = []

with torch.no_grad():
  for images, labels in test_loader:
    images, labels = images.to(device), labels.to(device)
    #getting the predicted classes for this batch
    output = model_kanc(images)
    #calculating the loss for this batch
    test_loss += criterion(output, labels).item()
    #calculating the accuracy for this batch
    _, predicted = torch.max(output.data, 1)
    correct += (labels == predicted).sum().item()
    #collecting all targets and predictions for metric calculations
    all_targets.extend(labels.view_as(predicted).cpu().numpy())
    all_predictions.extend(predicted.cpu().numpy())

#normalizing test loss
test_loss /= len(test_loader.dataset)
#calculating accuracy
accuracy = correct / len(test_loader.dataset)
#calculating overall metrics
precision = precision_score(all_targets, all_predictions, average='macro')
recall = recall_score(all_targets, all_predictions, average='macro')
f1 = f1_score(all_targets, all_predictions, average='macro')

print('\nTest set:\n Accuracy: {:.2f}%, \n Precision: {:.2f}, \n Recall: {:.2f}, \n F1 Score: {:.2f}\n'.format(accuracy, precision, recall, f1))


Test set:
 Accuracy: 0.98%, 
 Precision: 0.98, 
 Recall: 0.98, 
 F1 Score: 0.98

