In [1]:
from timm import create_model

import torchvision
import torchvision.transforms as transforms

import os
import torch
import torch.nn as nn

from copy import deepcopy

import numpy as np

from glob import glob

import pandas as pd

# Config

In [2]:
class Config:
    datadir = '/datasets/'
    modelname = 'vit_tiny_patch16_224_in21k'
    img_size = 224
    
args = Config()

# DataLoader

In [16]:
def build_loader(args):
    transform_test = transforms.Compose([
        transforms.Resize(args.img_size),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

    testset = torchvision.datasets.CIFAR10(
        root=os.path.join(args.datadir, 'CIFAR10'), train=False, download=True, transform=transform_test)
    testloader = torch.utils.data.DataLoader(
        testset, batch_size=100, shuffle=False, num_workers=2)

    return testloader

def test(net, testloader):
    criterion = nn.CrossEntropyLoss()
    
    global best_acc
    net.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            inputs, targets = inputs.cuda(), targets.cuda()
            outputs = net(inputs)
            loss = criterion(outputs, targets)

            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
    
    acc = 100.*correct/total
    loss = test_loss / len(testloader)
    
    return acc, loss

# Model Soup

In [11]:
def add_ingradient(soup, modelname, ckp_path):
    ingradient = create_model(modelname)
    ingradient.load_state_dict(torch.load(ckp_path)['net'])
    ingradient.cuda()

    for param1, param2 in zip(soup.parameters(), ingradient.parameters()):
        param1.data = (param1.data + param2.data) / 2
        
    return soup

def greedy_soup(ckp_list, modelname, testloader):
    soup = create_model(modelname)
    soup.load_state_dict(torch.load(ckp_list[0])['net'])
    soup.cuda()
    
    best_acc, loss = test(soup, testloader)
    print(f'first acc: {best_acc:.2f}%')
    
    # cook
    for i, ckp_i in enumerate(ckp_list[1:]):
        soup_next = deepcopy(soup)
        soup_next = add_ingradient(soup_next, modelname, ckp_i)
        acc, loss = test(soup_next, testloader)
        
        print(f'acc of {i} ingradient: {acc:.2f}%')
        
        if acc > best_acc:
            soup = soup_next
    
    return soup

# Results

**ViT-T/16**

In [12]:
testloader = build_loader(args)

Files already downloaded and verified


In [5]:
ckp_list = glob('./pytorch-cifar/checkpoint/*vit*')
acc_list = []
loss_list = []

for ckp_i in ckp_list:
    net = create_model(args.modelname)
    net.cuda()
    net.load_state_dict(torch.load(ckp_i)['net'])

    acc, loss = test(net, testloader)
    
    acc_list.append(acc)
    loss_list.append(loss)
    
# cook
ckp_list = glob('./pytorch-cifar/checkpoint/*vit*')
net_soup = greedy_soup(ckp_list, args.modelname, testloader)

acc, loss = test(net_soup, testloader, criterion)
    
acc_list.append(acc)
loss_list.append(loss)
ckp_list.append('soup')

first acc: 96.53%
acc of 0 ingradient: 96.79%
acc of 1 ingradient: 96.66%
acc of 2 ingradient: 96.70%
acc of 3 ingradient: 96.74%
acc of 4 ingradient: 96.90%


In [7]:
vit_df = pd.DataFrame({'acc':acc_list, 'loss':loss_list, 'ckp_path':ckp_list})
vit_df

Unnamed: 0,acc,loss,ckp_path
0,96.53,0.111938,./pytorch-cifar/checkpoint/vit_tiny_patch16_22...
1,96.5,0.115345,./pytorch-cifar/checkpoint/vit_tiny_patch16_22...
2,96.66,0.109672,./pytorch-cifar/checkpoint/vit_tiny_patch16_22...
3,96.75,0.113079,./pytorch-cifar/checkpoint/vit_tiny_patch16_22...
4,96.83,0.109476,./pytorch-cifar/checkpoint/vit_tiny_patch16_22...
5,96.66,0.109476,./pytorch-cifar/checkpoint/vit_tiny_patch16_22...
6,96.9,0.106054,soup


In [23]:
vit_df.round(4).to_csv('vit-t16-results.csv',index=False)

**ResNet50**

In [17]:
args.modelname = 'resnet50d'
args.img_size = 32

testloader = build_loader(args)

Files already downloaded and verified


In [18]:
ckp_list = glob('./pytorch-cifar/checkpoint/*resnet*')
acc_list = []
loss_list = []

for ckp_i in ckp_list:
    net = create_model(args.modelname)
    net.cuda()
    net.load_state_dict(torch.load(ckp_i)['net'])

    acc, loss = test(net, testloader)
    
    acc_list.append(acc)
    loss_list.append(loss)
    
# cook
ckp_list = glob('./pytorch-cifar/checkpoint/*resnet*')
net_soup = greedy_soup(ckp_list, args.modelname, testloader)

acc, loss = test(net_soup, testloader)
    
acc_list.append(acc)
loss_list.append(loss)
ckp_list.append('soup')

first acc: 91.27%
acc of 0 ingradient: 14.55%
acc of 1 ingradient: 14.07%
acc of 2 ingradient: 9.08%
acc of 3 ingradient: 10.10%
acc of 4 ingradient: 10.01%


In [19]:
resnet_df = pd.DataFrame({'acc':acc_list, 'loss':loss_list, 'ckp_path':ckp_list})
resnet_df

Unnamed: 0,acc,loss,ckp_path
0,91.27,0.306234,./pytorch-cifar/checkpoint/resnet50d_sgd_lr-0....
1,91.35,0.301552,./pytorch-cifar/checkpoint/resnet50d_sgd_lr-0....
2,91.34,0.301978,./pytorch-cifar/checkpoint/resnet50d_sgd_lr-0....
3,91.29,0.301511,./pytorch-cifar/checkpoint/resnet50d_sgd_lr-0....
4,91.35,0.300445,./pytorch-cifar/checkpoint/resnet50d_sgd_lr-0....
5,91.36,0.307123,./pytorch-cifar/checkpoint/resnet50d_sgd_lr-0....
6,91.27,0.306234,soup


In [24]:
resnet_df.round(4).to_csv('resnet50-results.csv',index=False)