In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [1]:
!git clone https://github.com/KrisnaPinasthika/SparseSwin.git

Cloning into 'SparseSwin'...
remote: Enumerating objects: 657, done.[K
remote: Counting objects: 100% (61/61), done.[K
remote: Compressing objects: 100% (49/49), done.[K
remote: Total 657 (delta 14), reused 54 (delta 11), pack-reused 596 (from 1)[K
Receiving objects: 100% (657/657), 1.11 GiB | 22.64 MiB/s, done.
Resolving deltas: 100% (50/50), done.
Updating files: 100% (548/548), done.


In [2]:
%cd SparseSwin


/kaggle/working/SparseSwin


In [4]:
%ls

 additional_experiments.ipynb   [0m[01;34mModels[0m/          train_cifar_args.py
 build_model.py                 README.md        train_cifar.py
 [01;34mdatasets[0m/                      [01;34mSavedModel[0m/      train_imagenet100.py
[01;34m'gradcam illustration'[0m/         [01;34mSources[0m/         traintest.py
 LICENSE                        [01;34mSparseSwinDet[0m/


In [22]:
# %load build_model.py
import torch
from Models.SparseSwin import SparseSwin

def buildSparseSwin(image_resolution, swin_type, num_classes, 
                    ltoken_num, ltoken_dims, num_heads, 
                    qkv_bias, lf, attn_drop_prob, lin_drop_prob, 
                    freeze_12, device):
    """
    image_resolution : input image resolution (h x w x 3), input MUST be a squared image and divisible by 16
    swin_type : Swin Transformer model type Tiny, Small, Base 
    num_classes : number of classes 
    """
    dims = {
        'tiny': 96, 
        'small': 96,
        'base': 128
    }
    dim_init = dims.get(swin_type.lower())
    
    if (dim_init == None) or ((image_resolution%16) != 0):
        print('Check your swin type OR your image resolutions are not divisible by 16')
        print('Remember.. it must be a squared image')
        return None 
    
    model = SparseSwin(
        swin_type=swin_type, 
        num_classes=num_classes, 
        c_dim_3rd=dim_init*4, 
        hw_size_3rd=int(image_resolution/16), 
        ltoken_num=ltoken_num, 
        ltoken_dims=ltoken_dims, 
        num_heads=num_heads, 
        qkv_bias=qkv_bias, 
        lf=lf, 
        attn_drop_prob=attn_drop_prob, 
        lin_drop_prob=lin_drop_prob, 
        freeze_12=freeze_12,
        device=device, 
    ).to(device)
    
    return model 

if __name__ == '__main__': 
    swin_type = 'tiny'
    device = 'cuda'
    image_resolution = 224
    
    model = buildSparseSwin(
        image_resolution=image_resolution,
        swin_type=swin_type, 
        num_classes=100, 
        ltoken_num=49, 
        ltoken_dims=512, 
        num_heads=16, 
        qkv_bias=True,
        lf=2, 
        attn_drop_prob=.0, 
        lin_drop_prob=.0, 
        freeze_12=False,
        device=device
    )
    

In [34]:
%%writefile train_cifar.py
import torch
import torchvision.transforms as transforms 
from torchvision import datasets
import numpy as np 
from traintest import train
import build_model as build

torch.random.manual_seed(1)

# Dataset Config -------------------------------------------
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])

data_transform = {
        'train': transforms.Compose([
                    transforms.ToTensor(),
                    transforms.RandomResizedCrop(224),
                    transforms.RandomHorizontalFlip(),
                    transforms.Normalize(mean, std)
                ]), 
        'val': transforms.Compose([
                    transforms.ToTensor(), 
                    transforms.Resize((224, 224), antialias=None),
                    transforms.Normalize(mean, std)
                ])
    }

status = True
# Todo: Train on CIFAR10
train_dataset = datasets.CIFAR10(
                root='./datasets/torch_cifar10/', 
                train=True, 
                transform=data_transform['train'], 
                download=status)
val_dataset = datasets.CIFAR10(
                root='./datasets/torch_cifar10/', 
                train=False, 
                transform=data_transform['val'], 
                download=status)

# Todo: Train on CIFAR100
# train_dataset = datasets.CIFAR100(
#                 root='./datasets/torch_cifar100/', 
#                 train=True, 
#                 transform=data_transform['train'], 
#                 download=status)
# val_dataset = datasets.CIFAR100(
#                 root='./datasets/torch_cifar100/', 
#                 train=False, 
#                 transform=data_transform['val'], 
#                 download=status)

batch_size = 64
train_loader = torch.utils.data.DataLoader(
                train_dataset, 
                batch_size=batch_size, 
                shuffle=True, 
                num_workers=2, 
                pin_memory=True)

val_loader = torch.utils.data.DataLoader(
                val_dataset, 
                batch_size=batch_size, 
                shuffle=True,
                num_workers=2, 
                pin_memory=True)


if __name__ == '__main__':
    dataset = 'cifar10'
    swin_type = 'tiny'
    reg_type, reg_lambda = 'l1', 1e-5
    device = torch.device('cuda')
    epochs = 100
    show_per = 200
    ltoken_num, ltoken_dims = 49, 256
    lf = 2
    
    model = build.buildSparseSwin(
        image_resolution=224,
        swin_type=swin_type, 
        num_classes=10, 
        ltoken_num=ltoken_num, 
        ltoken_dims=ltoken_dims, 
        num_heads=16, 
        qkv_bias=True,
        lf=lf, 
        attn_drop_prob=.0, 
        lin_drop_prob=.0, 
        freeze_12=False,
        device=device)
    
    optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5, weight_decay=0.01)
    criterion = torch.nn.CrossEntropyLoss()
    
    train(
        train_loader, 
        swin_type, 
        dataset, 
        epochs, 
        model, 
        lf, 
        ltoken_num,
        optimizer, 
        criterion, 
        device, 
        show_per=show_per,
        reg_type=None, 
        reg_lambda=0.0, 
        validation=val_loader)


Overwriting train_cifar.py


In [35]:
!python train_cifar.py

[TRAIN] Total : 782 | type : tiny | Regularization : None with lamda : 0.0
Epoch 1/100
  [200/782] Loss: 1.2707 Acc : 0.5519
  [400/782] Loss: 0.9870 Acc : 0.6568
  [600/782] Loss: 0.8680 Acc : 0.6999
  [782/782] Loss: 0.8052 Acc : 0.7221
Loss: 0.8052 Acc : 0.7221
[TEST] Total : 157 | type : tiny
[Model : tiny] Loss: 0.2206 Acc : 0.9279

Epoch 2/100
  [200/782] Loss: 0.4596 Acc : 0.8403
  [400/782] Loss: 0.4585 Acc : 0.8407
  [600/782] Loss: 0.4541 Acc : 0.8436
  [782/782] Loss: 0.4531 Acc : 0.8432
Loss: 0.4531 Acc : 0.8432
[TEST] Total : 157 | type : tiny
[Model : tiny] Loss: 0.1824 Acc : 0.9394

Epoch 3/100
  [200/782] Loss: 0.4120 Acc : 0.8580
  [400/782] Loss: 0.4051 Acc : 0.8598
  [600/782] Loss: 0.4041 Acc : 0.8600
  [782/782] Loss: 0.3998 Acc : 0.8607
Loss: 0.3998 Acc : 0.8607
[TEST] Total : 157 | type : tiny
[Model : tiny] Loss: 0.1481 Acc : 0.9487

Epoch 4/100
  [200/782] Loss: 0.3842 Acc : 0.8666
  [400/782] Loss: 0.3713 Acc : 0.8709
  [600/782] Loss: 0.3656 Acc : 0.8732
  [7

In [28]:
%%writefile traintest.py
import torch
import os 
import numpy as np 
import pandas as pd

def train(train_loader, swin_type, dataset, epochs, model, lf, token_num,
                optimizer, criterion, device, show_per,  
                reg_type=None, reg_lambda=0., validation=None):
    model.train()
    total_batch = train_loader.__len__()
    train_test_hist = []
    best_test_acc = -99
    
    specific_dir = f'./SavedModel/{dataset}/SparseSwin_reg_{reg_type}_lbd_{reg_lambda}_lf_{lf}_{token_num}'
    if f'SparseSwin_reg_{reg_type}_lbd_{reg_lambda}_lf_{lf}_{token_num}' not in os.listdir(f'./SavedModel/{dataset}/'): 
        os.mkdir(specific_dir)
    
    print(f"[TRAIN] Total : {total_batch} | type : {swin_type} | Regularization : {reg_type} with lamda : {reg_lambda}")
    for epoch in range(epochs):
        print(f"Epoch {epoch+1}/{epochs}")
        running_loss, n_correct, n_sample = 0.0, 0.0, 0.0

        for i, data in enumerate(train_loader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass with flexible unpacking
            result = model(inputs)
            if isinstance(result, tuple):
                outputs = result[0]  # First element is the prediction tensor
                attn_weights = result[1] if len(result) > 1 else None  # Second element is attention weights (list of tensors)
            else:
                outputs = result
                attn_weights = None

            
            reg = 0
            if reg_type == 'l1':                
                for attn_w in attn_weights: 
                    reg += torch.sum(torch.abs(attn_w))
                    
            elif reg_type == 'l2':
                for attn_w in attn_weights: 
                    reg += torch.sum(attn_w**2)
                        
            reg = reg_lambda * reg
            
            loss = criterion(outputs, labels) + reg
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            with torch.no_grad(): 
                n_correct_per_batch = torch.sum(torch.argmax(outputs, dim=1) == labels)
                n_correct += n_correct_per_batch
                n_sample += labels.shape[0]
                acc = n_correct / n_sample

            if ((i + 1) % show_per == 0) or ((i + 1) == total_batch):
                print(f'  [{i + 1}/{total_batch}] Loss: {(running_loss / (i + 1)):.4f} Acc : {acc:.4f}')

        print(f'Loss: {(running_loss / total_batch):.4f} Acc : {(n_correct / n_sample):.4f}')
        
        # Save model
        test_loss, test_acc = test(validation, swin_type=swin_type, model=model, criterion=criterion, device=device)
        train_loss, train_acc = (running_loss / total_batch), (n_correct / n_sample)

        test_loss, train_loss = round(test_loss, 4), round(train_loss, 4)
        train_test_hist.append([train_loss, round(train_acc.item(), 4), test_loss, round(test_acc.item(), 4)])
        
        if test_acc >= best_test_acc:
            best_test_acc = test_acc
            torch.save(model.state_dict(), f'{specific_dir}/model_{epoch+1}.pt')
    
    train_test_hist = np.array(train_test_hist)
    df = pd.DataFrame()
    df['train_loss'] = train_test_hist[:, 0]
    df['train_acc'] = train_test_hist[:, 1]
    df['test_loss'] = train_test_hist[:, 2]
    df['test_acc'] = train_test_hist[:, 3]
    df.to_csv(f'{specific_dir}/hist.csv', index=None)
    
    # save state for last epoch
    # torch.save({'epoch': epoch,
    #                     'model_state_dict': model.state_dict(),
    #                     'optimizer_state_dict': optimizer.state_dict(),
    #                     'loss': loss}, 
    #                     f'./TrainingState/{dataset}/SparseSwin_{reg_type}_{reg_lambda}_lf_{lf}_{epoch+1}')
    # print('Finished Training, saved training state :D')
    # print("Train Loss, Train Acc, Test Loss, Test Acc")
    # print(train_test_hist)

def test(val_loader, swin_type, model, criterion, device):
    model.eval()

    with torch.no_grad():
        total_batch = val_loader.__len__()
        print(f"[TEST] Total : {total_batch} | type : {swin_type}")
        running_loss, n_correct, n_sample = 0.0, 0.0, 0.0

        for i, data in enumerate(val_loader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            # Forward pass with flexible unpacking
            result = model(inputs)
            if isinstance(result, tuple):
                outputs = result[0]  # Only need the prediction tensor
            else:
                outputs = result

            
            loss = criterion(outputs, labels)

            running_loss += loss.item()

            n_correct_per_batch = torch.sum(torch.argmax(outputs, dim=1) == labels)
            n_correct += n_correct_per_batch
            n_sample += labels.shape[0]
            acc = n_correct / n_sample

    print(f'[Model : {swin_type}] Loss: {(running_loss / total_batch):.4f} Acc : {(n_correct / n_sample):.4f}')
    print()
    return (running_loss / total_batch), (n_correct / n_sample)


Overwriting traintest.py


In [47]:
%%writefile train_cifar_args.py
import torch
import torchvision.transforms as transforms 
from torchvision import datasets
import numpy as np 
from traintest import train
import build_model as build
import argparse

torch.random.manual_seed(1)

"""
Parser: 
python train_cifar.py -dataset cifar10 -batchsize=24 -reg_type=None -sparseswin_type tiny -device cuda -epochs 1 -freeze_12 False
"""

parser = argparse.ArgumentParser()
parser.add_argument('-dataset', help='cifar10 or cifar100', type=str, choices=['cifar10', 'cifar100'])
parser.add_argument('-batchsize', help='the number of batch', type=int)
parser.add_argument('-reg_type', help='the type of regularization', type=str, default='None', choices=['None', 'l1', 'l2'])
parser.add_argument('-reg_lambda', help='the lambda for regualrization\nIf regularization None then you dont need to specify this', type=float, default=0)
parser.add_argument('-sparseswin_type', help='Type of the model', type=str, choices=['tiny', 'small', 'base'])
parser.add_argument('-device', help='the computing device [cpu/cuda/etc]', type=str)
parser.add_argument('-epochs', help='the number of epoch', type=int, default=100)
parser.add_argument('-show_per', help='Displaying verbose per batch for each epoch', type=int, default=300)
parser.add_argument('-lf', help='number of lf', type=int, default=2)
parser.add_argument('-freeze_12', help='freeze? false / true', type=str, choices=['False', 'True'])

args = parser.parse_args()
list_of_models = {
    'tiny': {'ltoken_num': 49, 'ltoken_dims':512},
    'small': {'ltoken_num': 64, 'ltoken_dims':768},
    'base': {'ltoken_num': 81, 'ltoken_dims':1024},
}
model_type = list_of_models.get(args.sparseswin_type)

dataset = args.dataset.lower()
if dataset == 'cifar10':
    num_classes = 10 
else: 
    num_classes = 100
    
swin_type = 'tiny'
reg_type, reg_lambda = args.reg_type, args.reg_lambda
device = torch.device(args.device)
epochs = args.epochs
show_per = args.show_per
ltoken_num, ltoken_dims = model_type['ltoken_num'], model_type['ltoken_dims']
batch_size = args.batchsize
lf = 2
freeze_12 = False if args.freeze_12 == 'False' else True

# Dataset Config -------------------------------------------
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])

data_transform = {
        'train': transforms.Compose([
                    transforms.ToTensor(),
                    transforms.RandomResizedCrop(224, antialias=None),
                    transforms.RandomHorizontalFlip(),
                    transforms.Normalize(mean, std)
                ]), 
        'val': transforms.Compose([
                    transforms.ToTensor(), 
                    transforms.Resize((224, 224), antialias=None),
                    transforms.Normalize(mean, std)
                ])
    }

status = True
if dataset == 'cifar10':
    # Todo: Train on CIFAR10
    train_dataset = datasets.CIFAR10(
                    root='./datasets/torch_cifar10/', 
                    train=True, 
                    transform=data_transform['train'], 
                    download=status)
    val_dataset = datasets.CIFAR10(
                    root='./datasets/torch_cifar10/', 
                    train=False, 
                    transform=data_transform['val'], 
                    download=status)
elif dataset == 'cifar100':
    # Todo: Train on CIFAR100
    train_dataset = datasets.CIFAR100(
                    root='./datasets/torch_cifar100/', 
                    train=True, 
                    transform=data_transform['train'], 
                    download=status)
    val_dataset = datasets.CIFAR100(
                    root='./datasets/torch_cifar100/', 
                    train=False, 
                    transform=data_transform['val'], 
                    download=status)
else:
    print('Dataset is not availabel')


train_loader = torch.utils.data.DataLoader(
                train_dataset, 
                batch_size=batch_size, 
                shuffle=True, 
                num_workers=2, 
                pin_memory=True)

val_loader = torch.utils.data.DataLoader(
                val_dataset, 
                batch_size=batch_size, 
                shuffle=True,
                num_workers=2, 
                pin_memory=True)


if __name__ == '__main__':
    print(f"Training process will begin..")
    print(f"SparseSwin Model : {args.sparseswin_type} | ltoken_num : {ltoken_num} | ltoken_dims : {ltoken_dims}")
    print(f"dataset : {dataset}")
    print(f"epochs : {epochs} | batch_size : {batch_size} | freeze12? : {freeze_12}")
    print(f"device : {device}")

    model = build.buildSparseSwin(
        image_resolution=224,
        swin_type=swin_type, 
        num_classes=num_classes, 
        ltoken_num=ltoken_num, 
        ltoken_dims=ltoken_dims, 
        num_heads=16, 
        qkv_bias=True,
        lf=lf, 
        attn_drop_prob=.0, 
        lin_drop_prob=.0, 
        freeze_12=freeze_12,
        device=device)
    
    optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5, weight_decay=0.01)
    criterion = torch.nn.CrossEntropyLoss()
    
    train(
        train_loader, 
        swin_type, 
        dataset, 
        epochs, 
        model, 
        lf, 
        ltoken_num,
        optimizer, 
        criterion, 
        device, 
        show_per=show_per,
        reg_type=reg_type, 
        reg_lambda=reg_lambda, 
        validation=val_loader)

Overwriting train_cifar_args.py


In [None]:
!python train_cifar_args.py -dataset cifar100 -epochs 100 -batchsize 100 -sparseswin_type tiny -device cuda

100%|████████████████████████████████████████| 169M/169M [00:10<00:00, 16.0MB/s]
Training process will begin..
SparseSwin Model : tiny | ltoken_num : 49 | ltoken_dims : 512
dataset : cifar100
epochs : 100 | batch_size : 100 | freeze12? : True
device : cuda
[TRAIN] Total : 500 | type : tiny | Regularization : None with lamda : 0
Epoch 1/100
  [300/500] Loss: 2.9743 Acc : 0.3103
  [500/500] Loss: 2.5016 Acc : 0.4018
Loss: 2.5016 Acc : 0.4018
[TEST] Total : 100 | type : tiny
[Model : tiny] Loss: 1.0288 Acc : 0.7187

Epoch 2/100
  [300/500] Loss: 1.4054 Acc : 0.6242
  [500/500] Loss: 1.3747 Acc : 0.6307
Loss: 1.3747 Acc : 0.6307
[TEST] Total : 100 | type : tiny
[Model : tiny] Loss: 0.7885 Acc : 0.7751

Epoch 3/100
  [300/500] Loss: 1.1991 Acc : 0.6737
  [500/500] Loss: 1.1940 Acc : 0.6736
Loss: 1.1940 Acc : 0.6736
[TEST] Total : 100 | type : tiny
[Model : tiny] Loss: 0.6973 Acc : 0.7955

Epoch 4/100
  [300/500] Loss: 1.0916 Acc : 0.7014
  [500/500] Loss: 1.0931 Acc : 0.6996
Loss: 1.0931 Ac