In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os

# Replace 'your/folder/path' with the path to the directory you want to use
path = '/content/drive/MyDrive/CKAN-drive'
os.chdir(path)

# Now the current directory is set to 'your/folder/path'
print("Current Working Directory is changed to:", os.getcwd())

Current Working Directory is changed to: /content/drive/.shortcut-targets-by-id/1ZmxbZ-k3A2Y5JI2lhHxbV7Wj0OLLdRAj/CKAN-drive


In [3]:
# Setup
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
from torchsummary import summary
from PIL import Image
import spline
from torch.utils.data import TensorDataset, DataLoader
import os
from torchvision.datasets import MNIST

import torch.nn.functional as F


In [None]:
# Main implementation of CKAN
class CKAN(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, grid, stride=1, padding=0, degree=3, grid_range=[-1, 1], device='cuda:0'):
        super(CKAN, self).__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        self.degree = degree
        self.grid_range = grid_range
        self.device = device

        # Initialize knots and coefficients on the right device during creation

        knots = torch.linspace(grid_range[0], grid_range[1], steps=grid + 1, device=device).view(1, 1, -1)
        knots = knots.repeat(out_channels, in_channels * kernel_size * kernel_size, 1)
        self.knots = nn.Parameter(knots, requires_grad=False)

        self.coeff = nn.Parameter(0.1 * torch.randn(out_channels, in_channels * kernel_size * kernel_size, grid + degree, device=device), requires_grad=True)
        
        # Initialize the using Xavier method, as specified in paper
        self.base_weights = torch.nn.Parameter(torch.Tensor(out_channels, kernel_size*kernel_size * in_channels), requires_grad=True)
        nn.init.xavier_uniform_(self.base_weights)  # Xavier uniform initialization
        
        # Initialize with ones, as specified in paper
        self.spline_weights = torch.nn.Parameter(torch.ones(out_channels, kernel_size*kernel_size*in_channels), requires_grad=True)

    def forward(self, x):
        N, _, H, W = x.shape
        x_padded = F.pad(x, [self.padding, self.padding, self.padding, self.padding])

        # Unfold to get all sliding windows - Shape becomes  (N, C*K*K, L) where L is the number of extracted windows
        unfolded = F.unfold(x_padded, kernel_size=self.kernel_size, stride=self.stride, padding=0)

        unfolded = unfolded.transpose(1, 2).reshape(N, -1, self.in_channels, self.kernel_size, self.kernel_size)

        # Prepare unfolded for batch processing in coef2curve - Final shape becomes (C*K*K, N * L)
        unfolded = unfolded.reshape(-1, self.in_channels * self.kernel_size * self.kernel_size).t()  # (batch_size*Hp*Wp, features)


        # Output tensor initialization
        Hp = (H + 2 * self.padding - self.kernel_size) // self.stride + 1
        Wp = (W + 2 * self.padding - self.kernel_size) // self.stride + 1
        output = torch.zeros((N, self.out_channels, Hp, Wp), device=self.device)

        # Loop through each output channel
        for c in range(self.out_channels):
            # This calculates w_b*b(x) - Output shape - (1, N * L)
            base_values = F.linear(F.silu(unfolded).t(), self.base_weights[c]).t()
            # This calculates w_s*spline(x) - Output shape - (1, N * L). Instead of summing the spline values as before, we use (C*K*K, 1) dimensional weights
            spline_values = F.linear(spline.coef2curve(unfolded, self.knots[c], self.coeff[c], self.degree, device=self.device).t(), self.spline_weights[c]).t()
            res_values = base_values + spline_values 
            output[:, c, :, :] = res_values.view(N, Hp, Wp)
        
        return output

In [5]:

# CNN architecture
class CNNNet(nn.Module):
  def __init__(self, in_channels, hidden_channels, out_features, device='cuda:0'):
          super(CNNNet, self).__init__()

          self.conv1 =  nn.Conv2d(in_channels, hidden_channels[0],
                                 kernel_size=3,
                                 padding=1)

          self.conv2 =  nn.Conv2d(hidden_channels[0], hidden_channels[1],
                          kernel_size=3,
                          padding=1)
          self.relu = nn.ReLU()
          self.max_pool = nn.MaxPool2d(2)

          self.linear1 = nn.Linear(245, out_features)

  def forward(self, x):
      # First convolutional layer
      x = self.relu(self.conv1(x))
      x = self.max_pool(x)
      x = self.relu(self.conv2(x))
      x = self.max_pool(x)
      x = x.view(x.size(0), -1)
      x = self.linear1(x)
      return x

# CKAN architecture
class CKANNet(nn.Module):
  def __init__(self, in_channels, hidden_channels, out_features, device='cuda:0'):
          super(CKANNet, self).__init__()
          self.conv1 = CKAN(in_channels, hidden_channels[0],
                                  kernel_size=3,
                                  padding=1, grid=5, device=device)

          self.conv2 = CKAN(hidden_channels[0], hidden_channels[1],
                                kernel_size=3,
                                padding=1, grid=5, device=device)
          # self.relu = nn.ReLU()
          self.max_pool = nn.MaxPool2d(2)
          self.linear1 = nn.Linear(245, out_features)

  def forward(self, x):
      # First convolutional layer
      x = self.conv1(x)
      x = self.max_pool(x)
      # print(x[0][0])
      x = self.conv2(x)
      x = self.max_pool(x)
      x = x.view(x.size(0), -1)
      x = self.linear1(x)
      return x


In [6]:
# Transformaciones
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

# Cargar MNIST y filtrar por dos clases
mnist_train = MNIST(root='./data', train=True, download=True, transform=transform)

mnist_test = MNIST(root='./data', train=False, download=True, transform=transform)

# DataLoader
train_loader = DataLoader(mnist_train, batch_size=64, shuffle=True)
test_loader = DataLoader(mnist_test, batch_size=64, shuffle=False)

In [7]:
def evaluate_accuracy(data_loader, net, device=torch.device('cuda:0')):
    """Evaluate accuracy of a model on the given data set."""
    net.eval()  #make sure network is in evaluation mode

    #init
    acc_sum = torch.tensor([0], dtype=torch.float32, device=device)
    n = 0

    for X, y in data_loader:
        # Copy the data to device.
        X, y = X.to(device), y.to(device)
        with torch.no_grad():
            y = y.long()
            acc_sum += torch.sum((torch.argmax(net(X), dim=1) == y))
            n += y.shape[0] #increases with the number of samples in the batch
    return acc_sum.item()/n

In [8]:
def try_gpu():
    """
    If GPU is available, return torch.device as cuda:0; else return torch.device
    as cpu.
    """
    if torch.cuda.is_available():
        device = torch.device('cuda:0')
    else:
        device = torch.device('cpu')
    return device


In [None]:
in_channels = 1 # Black-white images in MNIST digits
hidden_channels = [5, 5]
out_features = 10

# Training parameters
learning_rate = 0.0035
epochs = 5

# Try using gpu instead of cpu
device = try_gpu()

# Uncomment other line if you want to test plain CNN
net = CKANNet(in_channels, hidden_channels, out_features, device=device)
# net = CNNNet(in_channels, hidden_channels, out_features, device=device)
net.to(device)
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

# Define list to store losses and performances of each iteration
train_losses = []
train_accs = []
test_accs = []

for epoch in range(epochs):
    # Network in training mode and to device
    net.train()
    # print(net.conv1.coeff)

    # Training loop
    for i, (x_batch, y_batch) in enumerate(train_loader):
        # Set to same device
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        # Set the gradients to zero
        optimizer.zero_grad()
        # Perform forward pass
        y_pred = net(x_batch)
        # Compute the loss
        loss = criterion(y_pred, y_batch)
        train_losses.append(loss)

        # Backward computation and update
        loss.backward()

        optimizer.step()

    # Compute train and test error
    train_acc = 100*evaluate_accuracy(train_loader, net.to(device))
    test_acc = 100*evaluate_accuracy(test_loader, net.to(device))

    # Development of performance
    train_accs.append(train_acc)
    test_accs.append(test_acc)

    # Print performance
    print('Epoch: {:.1f}'.format(epoch+1))
    print('Accuracy of train set: {:.1f}%'.format(train_acc))
    print('Accuracy of test set: {:.1f}%'.format(test_acc))
    print('')

Parameter containing:
tensor([[[ 0.0817, -0.0694, -0.0736, -0.0309, -0.0528, -0.0167, -0.0357,
           0.0601],
         [ 0.1712, -0.0725, -0.0225, -0.0823,  0.0817,  0.0295, -0.0901,
          -0.0193],
         [-0.0561, -0.0516, -0.0464, -0.0664, -0.0049, -0.1110, -0.0903,
          -0.1307],
         [-0.0015, -0.1170,  0.0311, -0.0150, -0.0228,  0.1965,  0.0443,
           0.0197],
         [ 0.1254,  0.1458,  0.1078, -0.0165,  0.1965,  0.0780, -0.0595,
           0.1188],
         [ 0.0187,  0.0243,  0.0594, -0.0110,  0.0525, -0.0600, -0.0272,
          -0.0782],
         [ 0.0246, -0.1081,  0.1039,  0.0103,  0.0060, -0.0085, -0.1193,
           0.1684],
         [ 0.1391, -0.1264, -0.0781,  0.0476,  0.1408, -0.0210,  0.1048,
          -0.0095],
         [-0.0306, -0.1341,  0.0400, -0.0680, -0.0499,  0.0086, -0.1219,
          -0.0209]]], requires_grad=True)
tensor([[-0.1037,  0.1317,  0.3308, -0.1956, -0.2440,  0.1058,  0.1266, -0.2597,
          0.2150, -0.3116],
        [-

KeyboardInterrupt: 

In [None]:
print(torch.cuda.is_available())


In [None]:
import matplotlib.pyplot as plt

# Assuming 'train_losses' is a list of tensors
train_losses_detached = [loss.detach().cpu().numpy() for loss in train_losses]

plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.xlabel('Iterations')
plt.ylabel('Loss')
plt.plot(train_losses_detached)  # Use the detached list of numpy arrays
plt.grid()

# Assuming you have additional plots or other code to follow
plt.subplot(1, 2, 2)
plt.xlabel('Epochs')
plt.ylabel('Accuracy (%)')
plt.plot(train_accs, label = 'train')
plt.plot(test_accs, label = 'test')
plt.legend()
plt.grid()
