# Model soup

In [1]:
# 기본코드
import inspect
import os
import sys
import time
import numpy as np

import torch
from mmcv import Config
from mmseg.datasets import build_dataloader, build_dataset
from mmseg.models import build_segmentor
from mmseg.apis import single_gpu_test
from mmcv.runner import load_checkpoint, load_state_dict
from mmcv.parallel import MMDataParallel

def uniform_soup(cfg, model, checkpoint_paths ,device = "cpu", by_name = False):
    try:
        import torch
    except:
        print("If you want to use 'Model Soup for Torch', please install 'torch'")
        return model
    
    dataset = build_dataset(cfg.data.val)
    data_loader = build_dataloader(
            dataset,
            samples_per_gpu=1,
            workers_per_gpu=cfg.data.workers_per_gpu,
            dist=False,
            shuffle=False)  
    
    
    model = model.to(device)
    model_dict = model.state_dict()
    soups = {key:[] for key in model_dict}
    checkpoint = {}
    for i, checkpoint_path in enumerate(checkpoint_paths):
        checkpoint = load_checkpoint(model, checkpoint_path, map_location='cpu')
        weight_dict = checkpoint['state_dict']
        for k, v in weight_dict.items():
            soups[k].append(v)
    if 0 < len(soups):
        soups = {k:(torch.sum(torch.stack(v), axis = 0) / len(v)).type(v[0].dtype) for k, v in soups.items() if len(v) != 0}
        model_dict.update(soups)
        model.load_state_dict(model_dict)
    
    load_state_dict(model, model_dict)
    model.CLASSES = dataset.CLASSES
    model = MMDataParallel(model.cuda(), device_ids=[0])
    output = single_gpu_test(model, data_loader)
    eval_kwargs = {}
    eval_kwargs.update(metric=['mIoU'])
    metric = dataset.evaluate(output, **eval_kwargs)
    print(f"mIoU: {metric['mIoU']}")
    
    return model, checkpoint

def greedy_soup(cfg, model_ori, checkpoint_paths, device = "cpu"):
    try:
        import torch
    except:
        print("If you want to use 'Model Soup for Torch', please install 'torch'")
        return model_ori
    
    dataset = build_dataset(cfg.data.val)
    data_loader = build_dataloader(
            dataset,
            samples_per_gpu=1,
            workers_per_gpu=cfg.data.workers_per_gpu,
            dist=False,
            shuffle=False)  
    
    
    result = []
    checkpoint = {}
    for i, checkpoint_path in enumerate(checkpoint_paths):
        model = model_ori.to(device)
        checkpoint = load_checkpoint(model, checkpoint_path, map_location='cpu')
        model.CLASSES = dataset.CLASSES
        model = MMDataParallel(model.cuda(), device_ids=[0])
        output = single_gpu_test(model, data_loader)
        eval_kwargs = {}
        eval_kwargs.update(metric=['mIoU'])
        metric = dataset.evaluate(output, **eval_kwargs)
        result.append((metric['mIoU'],checkpoint_path))
        print(f"리스트에 {i}번째 mIoU {metric['mIoU']}저장")
    
    result.sort(key = lambda x : x[0], reverse = True)
    print(f"리스트 정렬")
    print(result)
    
    model = model_ori.to(device)
    model_dict = model.state_dict()
    pre_metric_value = 0
    pre_weight_dict = {}
    for i, (mIoU, checkpoint_path) in enumerate(result):
        model = model_ori.to(device)
        soups = {key:[] for key in model_dict}
        now_model_dict = model_dict
        if i == 0:
            checkpoint = load_checkpoint(model, checkpoint_path, map_location='cpu')
            pre_metric_value = mIoU
            pre_weight_dict = checkpoint['state_dict']
            print("soup 모델에 가장 높은 mIou를 가진 checkpoint가 추가되었습니다")
            print(f"추가된 checkpoint_path: {checkpoint_path}")
            print(f"현재 최고 mIoU: {pre_metric_value}")
        else:
            checkpoint = load_checkpoint(model, checkpoint_path, map_location='cpu')
            weight_dict = checkpoint['state_dict']
            
            for k, v in pre_weight_dict.items():
                soups[k].append(v)
            for k, v in weight_dict.items():
                soups[k].append(v)    
            if 0 < len(soups):
                soups = {k:(torch.sum(torch.stack(v), axis = 0) / len(v)).type(v[0].dtype) for k, v in soups.items() if len(v) != 0}
                now_model_dict.update(soups)
                
                
            load_state_dict(model, now_model_dict)
            model.CLASSES = dataset.CLASSES
            model = MMDataParallel(model.cuda(), device_ids=[0])
            output = single_gpu_test(model, data_loader)
            eval_kwargs = {}
            eval_kwargs.update(metric=['mIoU'])
            metric = dataset.evaluate(output, **eval_kwargs)
            
            if metric['mIoU'] >= pre_metric_value:
                pre_metric_value = metric['mIoU']
                pre_weight_dict = now_model_dict
                print("soup 모델에 새로운 checkpoint가 추가되었습니다")
                print(f"추가된 checkpoint_path: {checkpoint_path}")
                print(f"현재 최고 mIoU: {pre_metric_value}")
            else:
                print("이번 체크 포인트는 soup 모델에 추가되지 않았습니다")
                print(f"이번 checkpoint_path: {checkpoint_path}")
                print(f"현재 최고 mIoU: {pre_metric_value}, 이번 mIou {metric['mIoU']}")
            
    model = model_ori.to(device)
    load_state_dict(model, pre_weight_dict)
    return model, checkpoint



0. 모델 & checkpoint 가져오기

In [2]:
################ model cfg path 적기 ################
cfg= Config.fromfile('/opt/ml/input/code/mmsegmentation/configs/_cv07_/upernet_beit-large_fp32_8x1_640x640_160k_ade20k_final2.py')
################ model cfg path 적기 ################
model = build_segmentor(cfg.model)

################ soup할 checkpoint path 적기 ################
checkpoint_paths = [
    '/opt/ml/input/code/mmsegmentation/work_dirs/upernet_beit-large_fp32_8x1_640x640_160k_ade20k_final2/epoch_8.pth',
    '/opt/ml/input/code/mmsegmentation/work_dirs/upernet_beit-large_fp32_8x1_640x640_160k_ade20k_final2/epoch_9.pth',
    '/opt/ml/input/code/mmsegmentation/work_dirs/upernet_beit-large_fp32_8x1_640x640_160k_ade20k_final2/epoch_10.pth',
]
################ soup할 checkpoint path 적기 ################
device = "cpu"



1. uniform soup

In [3]:
################ save dir path 적기 ################
save_dir_path = '/opt/ml/input/code/mmsegmentation/work_dirs/upernet_beit-large_fp32_8x1_640x640_160k_ade20k_final2/'

print("\n[Uniform Soup]")
uniform_model, checkpoint = uniform_soup(cfg, model, checkpoint_paths, device = device)
uniform_dict = checkpoint
uniform_dict['state_dict'] = uniform_model.state_dict()

torch.save(uniform_dict, save_dir_path+f'uniform_model_soup.pth')

2023-01-05 06:49:59,861 - mmseg - INFO - Loaded 1 images



[Uniform Soup]
load checkpoint from local path: /opt/ml/input/code/mmsegmentation/work_dirs/upernet_beit-large_fp32_8x1_640x640_160k_ade20k_final2/epoch_8.pth
load checkpoint from local path: /opt/ml/input/code/mmsegmentation/work_dirs/upernet_beit-large_fp32_8x1_640x640_160k_ade20k_final2/epoch_9.pth
load checkpoint from local path: /opt/ml/input/code/mmsegmentation/work_dirs/upernet_beit-large_fp32_8x1_640x640_160k_ade20k_final2/epoch_10.pth
[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 1/1, 0.8 task/s, elapsed: 1s, ETA:     0sper class results:

+---------------+-------+-------+
|     Class     |  IoU  |  Acc  |
+---------------+-------+-------+
|   Backgroud   |  89.3 | 95.08 |
| General trash |  0.0  |  nan  |
|     Paper     |  0.0  |  nan  |
|   Paper pack  |  nan  |  nan  |
|     Metal     |  nan  |  nan  |
|     Glass     | 67.01 | 99.95 |
|    Plastic    | 41.46 | 41.68 |
|   Styrofoam   |  nan  |  nan  |
|  Plastic bag  | 23.69 | 23.87 |
|    Battery    |  nan  |  nan  |
|    Cloth

2. Greedy Soup (uniform weight update)

In [4]:
print("[Greedy Soup (uniform weight update)]")
greedy_model, checkpoint = greedy_soup(cfg, model, checkpoint_paths, device = device)
greedy_dict = checkpoint
greedy_dict['state_dict'] = greedy_model.state_dict()
torch.save(greedy_dict, save_dir_path+f'greedy_model_soup_{name}.pth')


2023-01-04 14:23:33,866 - mmseg - INFO - Loaded 324 images


[Greedy Soup (uniform weight update)]
load checkpoint from local path: /opt/ml/input/code/mmsegmentation/work_dirs/upernet_beit-large_fp32_8x1_640x640_160k_ade20k_final/epoch_16.pth
[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 324/324, 1.1 task/s, elapsed: 304s, ETA:     0sper class results:

+---------------+-------+-------+
|     Class     |  IoU  |  Acc  |
+---------------+-------+-------+
|   Backgroud   | 96.95 | 98.41 |
| General trash | 43.06 |  50.3 |
|     Paper     | 81.54 | 93.48 |
|   Paper pack  | 71.14 |  74.2 |
|     Metal     | 37.63 | 53.94 |
|     Glass     | 88.14 | 95.21 |
|    Plastic    | 63.27 | 78.59 |
|   Styrofoam   | 82.89 | 87.94 |
|  Plastic bag  | 87.47 | 94.81 |
|    Battery    | 85.37 | 99.97 |
|    Clothing   | 83.05 |  89.4 |
+---------------+-------+-------+
Summary:

+-------+-------+------+
|  aAcc |  mIoU | mAcc |
+-------+-------+------+
| 94.97 | 74.59 | 83.3 |
+-------+-------+------+
리스트에 0번째 mIoU 0.7459저장
load checkpoint from local path: /opt/ml/input/code