# Model Metrics Workshop
어떻게 하면 우리의 모델에서 아래의 내용을 알아낼 수 있는지 살펴본다.

1. The size of a model (in RAM)
2. The size of a model (when stored as a weight file)
3. The inference time
4. The FLOPs, MACs, and number of parameters

추후에 우리 모델이 제대로 최적화 되었는지 확인하기 위한 툴로 생각하면 좋겠다.

In [1]:
import numpy as np
from torch import nn
import torch, os, time

## Get a Model
가장 기본이 되는 모델을 불러온다. resnet18, 34, 50

In [2]:
from torchvision.models import resnet18, resnet34, resnet50

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_list = [resnet18(pretrained=False).to(device), resnet34(pretrained=False).to(device), resnet50(pretrained=False).to(device)]

모델에 넣을 dummy input을 생성한다.

In [4]:
dummy_input = torch.randn(2, 3, 224, 224).to(device)

# Get the Size of Model

저장된 모델이 RAM에 load 되었을 때의 사이즈를 알아본다.

> - Parameters: Weights & Biases
> - Buffers: Additional tensors used to track information such as the mean and standard deviation of a batchnorm.

Buffers는 batchnorm 과정을 통해 생기는 deviation이나 mean 같은 정보를 추적하는데 사용되는 tensor를 의미한다...? __저게 뭐지?__

추가적인 내용 조사 드가자 --> 예를 들어서 일반 변수가 램에 올라갔을 때의 경우를 분석한다. 

In [8]:
def get_model_size(model):
    size_of_parameters = sum([param.element_size() * param.nelement() for param in model.parameters()])
    size_of_buffers = sum([buf.element_size() * buf.nelement() for buf in model.buffers()])
    model_size = size_of_parameters + size_of_buffers #Bytes
    KILOBYTE_TO_BYTE = 1024
    MEGABYTE_TO_KILOBYTE = 1024
    model_size = model_size / (KILOBYTE_TO_BYTE * MEGABYTE_TO_KILOBYTE) #MegaBytes
    return model_size

In [9]:
for model in model_list:
  model_size = get_model_size(model)
  print(f"Size of the model = {round(model_size,3)} Mb")

Size of the model = 44.629 Mb
Size of the model = 83.217 Mb
Size of the model = 97.695 Mb


# Get the inference time
이제 추론 시간을 계산한다. 그냥 계산 전후 시간의 차이를 계산한다.

In [12]:
for model in model_list: 
    model = model.to("cpu")
    dummy_input = dummy_input.to("cpu")
    model.eval()
    with torch.no_grad():
        start_time = time.time()
        _ = model(dummy_input)
        end_time = time.time()
        t = (end_time - start_time)*100
        print(f"Inference time = {t:.4f}ms")

Inference time = 3.5304ms
Inference time = 5.3024ms
Inference time = 7.8869ms


In [16]:
def get_inference_time(model : nn.Module, dummy_input : torch.Tensor, device:torch.device, n_iters = 10) -> float:
    """Function to calculate inference time of model on given input

    Args:
        model (nn.Module): input model
        input (torch.Tensor): sample input of valid shape
        device (torch.device): compute device as in CPU, GPU, TPU]. Defaults to 'cpu'.
        nIters (int, optional): number of iterations over which to find avg inference time. Defaults to 10.

    Returns:
        avg_inference_time (float): Avg inference time for `input` over `nIters` for `model`
    """   
    # initialize default value
    avg_inference_time = np.inf # 무한대의 양수

    # check for GPU availability and user option
    checkForGPU = False
    if torch.cuda.is_available() == True:
        if 'cuda' in str(device):
            checkForGPU = True

    # check for zero input
    if n_iters > 0:
        # move to target device        
        model = model.to(device)
        dummy_input = dummy_input.to(device)

        # change model to inference mode
        model.eval()

        # find the avg time take for forward pass
        with torch.no_grad():
            start_time = time.time()
            for _ in range(n_iters):
                _ = model(dummy_input)

                # wait for cuda to finish (cuda is asynchronous!)
                if checkForGPU == True:
                    torch.cuda.synchronize()
            endTime = time.time()
        
        elapsedTime = endTime - start_time
        batch_size = dummy_input.size()[0]
        avg_inference_time = elapsedTime / (batch_size *  n_iters)
    return avg_inference_time 

In [17]:
for model in model_list:
    inference_time = get_inference_time(model, dummy_input, device) # in seconds
    print(f"Single batch inference Time of model = {round(inference_time,3)} seconds")

Single batch inference Time of model = 0.044 seconds
Single batch inference Time of model = 0.002 seconds
Single batch inference Time of model = 0.004 seconds


# Calculate the size of the model files
그냥 모델의 사이즈 확인하는 방법

In [19]:
def get_model_file_size(model : nn.Module) -> float:
    """function returns size of model state dict in MB 
    Args:
        model (nn.Module): input model
    Returns:
        modelFileSize (float): size of model state dict in MB
    """
    torch.save(model.state_dict(), "model.p")
    model_file_size = os.path.getsize("model.p")/1e6
    os.remove('model.p') # 잠깐 확인하는 용도
    return model_file_size

In [20]:
for model in model_list:
    model_file_size = get_model_file_size(model)
    print(f"Size of the model file = {round(model_file_size,3)} Mb")

Size of the model file = 46.836 Mb
Size of the model file = 87.33 Mb
Size of the model file = 102.545 Mb


# Calculate FLOPs, FLOPS and MACs

- **FLOPs** means Floating Point Operations — it's the number of operations being run.
- **FLOPS** means Floating Point Operations per Second — it's a hardware thing. The better your hardware, the more operations it can do.
- **MACs** means Multiply-Accumulate Computations — it's a combination of an addition and a multiplication (Input*Weight + Bias is a good example of it).
As a rule: 1 MAC = 2 FLOPs

In [21]:
import sys
sys.path.append('thop_library')

In [24]:
from thop import profile

ModuleNotFoundError: No module named 'thop'

In [23]:
def get_metrics(model, input):
    MACs, params = profile(model, inputs=(input,), verbose=False)
    FLOPs = 2*MACs
    return MACs* 1e-6, FLOPs*1e-6, params*1e-6

for model in model_list:
    input = input.to(device)
    MMACs, MFLOPs, Mparams = get_metrics(model, input)
    print(f"{round(MMACs,3)} MMACs, {round(MFLOPs,3)} MFLOPs and {round(Mparams,3)} M parameters")

/home/mint-lab/cjh_ws/ThinkAutonomous/NeuralOpt
