# Análise de modelos pré-treinados disponíveis 

Para visualizar os gráficos abra o notebook usando este botão: 

[![nbviewer](https://raw.githubusercontent.com/jupyter/design/master/logos/Badges/nbviewer_badge.svg)](https://nbviewer.org/github/chcomin/curso-visao-computacional-2024/blob/main/M12_topicos_extra/1%20-%20Modelos%20Pytorch.ipynb)

### Modelos de classificação disponíveis no Pytorch

In [1]:
import re
import pandas as pd
import plotly.express as px
import plotly.io as pio

pio.renderers.default = "notebook_connected"

df = pd.read_csv('models.csv', skipinitialspace=True)
df['Params'] = df['Params'].str.replace('M', '').astype(float)
family = []
for name in df['Weight']:
    family.append(re.split(r'\d|_', name)[0])
df['Família'] = family
df

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


Unnamed: 0,Weight,Acc@1,Acc@5,Params,GFLOPS,Família
0,AlexNet_Weights.IMAGENET1K_V1,56.522,79.066,61.1,0.71,AlexNet
1,ConvNeXt_Base_Weights.IMAGENET1K_V1,84.062,96.870,88.6,15.36,ConvNeXt
2,ConvNeXt_Large_Weights.IMAGENET1K_V1,84.414,96.976,197.8,34.36,ConvNeXt
3,ConvNeXt_Small_Weights.IMAGENET1K_V1,83.616,96.650,50.2,8.68,ConvNeXt
4,ConvNeXt_Tiny_Weights.IMAGENET1K_V1,82.520,96.146,28.6,4.46,ConvNeXt
...,...,...,...,...,...,...
110,ViT_L_32_Weights.IMAGENET1K_V1,76.972,93.070,306.5,15.38,ViT
111,Wide_ResNet101_2_Weights.IMAGENET1K_V1,78.848,94.284,126.9,22.75,Wide
112,Wide_ResNet101_2_Weights.IMAGENET1K_V2,82.510,96.020,126.9,22.75,Wide
113,Wide_ResNet50_2_Weights.IMAGENET1K_V1,78.468,94.086,68.9,11.40,Wide


In [2]:
fig1 = px.scatter(df, x='Params', y='Acc@1', hover_data=['Weight', 'GFLOPS'],
                 log_x=True, color='Família', 
                 title='Acurácia dos modelos de classificação do Pytorch em função do número de parâmetros')
fig1.update_traces(marker={'size': 4})
fig2 = px.scatter(df, x='GFLOPS', y='Acc@1', hover_data=['Weight', 'Params'],
                 log_x=True, color='Família',
                 title='Acurácia dos modelos de classificação do Pytorch em função do GFLOPS')
fig2.update_traces(marker={'size': 4})
fig1.show()
fig2.show()

Poucos parâmetros facilitam o treinamento do modelo. Poucos GFlops são importantes tanto para o treinamento quanto para utilizar o modelo para inferência.

### Modelos timm

Modelos da biblioteca Pytorch Image Models

In [3]:
import timm

def create_families(data, modules_to_highlight):

    models = []             # List of models
    model_to_module = {}    # Model name -> model family
    for module, filter in modules_to_highlight:
        l = timm.list_models(filter=filter, module=module, pretrained=True, include_tags=True)
        models.extend(l)
        for item in l:
            model_to_module[item] = module

    family = []
    for val in data['model']:
        if val in model_to_module:
            family.append(model_to_module[val])
        else:
            family.append('Outro')

    data['Família'] = family

def plot_data(data, title):

    fig = px.scatter(data, x="param_count", y="top1", color="Família", hover_name='model',
                    hover_data='model', log_x=True, title=title)
    fig.update_traces(marker={'size': 4})
    fig.show()

# Performance no ImageNet
data_in = pd.read_csv('results-imagenet.csv', thousands=',')
# Performance em um dataset com imagens das mesmas classes do ImageNet
# mas coletadas e classificadas de forma independente
data_v2 = pd.read_csv('results-imagenetv2-matched-frequency.csv', thousands=',')
# Performance de um dataset que contém variações do ImageNet, por exemplo
# imagens desenhadas à mão
data_sk = pd.read_csv('results-sketch.csv', thousands=',')
# Performance de inferência dos modelos
data_inf = pd.read_csv('benchmark-infer-amp-nhwc-pt210-cu121-rtx3090.csv')

modules_to_highlight = [('resnet', 'resnet50'), ('regnet', None), ('convnext', None), ('vision_transformer', 'vit*'),
                        ('swin_transformer_v2', None), ('swin_transformer_v2_cr', None)]
create_families(data_in, modules_to_highlight)
plot_data(data_in, 'Acurácia dos modelos timm no ImageNet')
create_families(data_v2, modules_to_highlight)
plot_data(data_v2, 'Acurácia dos modelos timm no ImageNetv2')
create_families(data_sk, modules_to_highlight)
plot_data(data_sk, 'Acurácia dos modelos timm no ImageNet sketch')

In [4]:
def merge_tables(data_in, data_inf):
    '''Merge results table with inference time table. Not all models are merged. Doing this correctly
     requires doing string similarity, which is hard. '''
    
    indices = []
    for idx_in, row in data_in.iterrows():
        model = row['model']
        if '.' in model:
            name, tag = model.split('.')
        else:
            name = model
        name = name.lower()
        found = False
        for idx_inf, row in data_inf.iterrows():
            model_inf = row['model']
            model_inf = model_inf.lower()
            model_inf = model_inf.replace('_in21ft1k', '').replace('_in22ft1k','')
            #if name in model_inf or model_inf in name:
            if name==model_inf:
                found = True
                indices.append((idx_in, idx_inf))

    idx_in, idx_inf = zip(*indices)
    data_in_c = data_in.iloc[list(idx_in)].reset_index(drop=True)
    data_inf_c = data_inf.iloc[list(idx_inf)].reset_index(drop=True)
    data_inf_c = data_inf_c.drop(['model', 'param_count'], axis=1)
    data_merged = pd.concat([data_in_c, data_inf_c], axis=1)

    return data_merged

def plot_inference(data_merged, modules_to_highlight):
    
    # Use only models where input image has size 224 or 288 (larger sizes lead to larger inference times)
    data_norm = data_merged[(data_merged['infer_img_size']==224) | (data_merged['infer_img_size']==288)]
    # Use only models where batch size is 1024 (different values trivially change inference time)
    data_norm = data_norm[data_norm['infer_batch_size']==1024]
    create_families(data_norm, modules_to_highlight)

    fig = px.scatter(data_norm, x="infer_samples_per_sec", y="top1", color="Família", hover_name='model',
                    hover_data='model', log_x=True,
                    title="Número de imagens por segundo em modo inferência")
    fig.update_traces(marker={'size': 4})
    fig.show()

data_merged = merge_tables(data_in, data_inf)

In [5]:
#modules_to_highlight = [('convnext', 'convnext_base*'), ('vision_transformer', 'vit_base_patch16_224*'), 
#                        ('vision_transformer', 'vit_base_patch16_clip_224*')]

plot_inference(data_merged, modules_to_highlight)

O gráfico acima mostra o número de imagens por segundo que uma RTX-3090 consegue processar, usando batch size 1024.