In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import os
import sys
# TODO: update this path
sys.path.append('/nfshomes/vla/low_bit_vision/lavis_clone/lavis')

# args parser for blip-2 runs
from args_parser import args_parser

import ast
from glob import glob
import re
import mmap

In [3]:
def load_result(file_path):
    with open(file_path, 'r+') as f:
        # read last line and grab metrics info as a dict
        result = ast.literal_eval(f.readlines()[-1].split('[INFO] ')[1])
        
        # grab model size output and add to results
        data = mmap.mmap(f.fileno(), 0)
        model_size = float(re.search(rb'\[Model Size\]: (.*)', data).group(1))
        result['model_size'] = model_size
        
    return result

In [4]:
# baseline result
baseline_result = load_result(os.path.join('..', 'results', 'blip2_flickr', 'blip2_flickr_baseline'))
baseline_result

{'txt_r1': 97.6,
 'txt_r5': 100.0,
 'txt_r10': 100.0,
 'txt_r_mean': 99.2,
 'img_r1': 89.74,
 'img_r5': 98.18,
 'img_r10': 98.94,
 'img_r_mean': 95.62,
 'r_mean': 97.41,
 'agg_metrics': 99.2,
 'model_size': 4782.180084}

In [5]:
parser = args_parser()
results_dir = os.path.join('..', 'results', 'blip2_flickr', 'uniform_quant')
results_dir

'../results/blip2_flickr/uniform_quant'

In [6]:
df_results = pd.DataFrame()

for folder in os.listdir(results_dir):
    folder_path = os.path.join(results_dir, folder)
    
    cli_args = []
    with open(os.path.join(folder_path, 'now.txt'), 'r') as f:
        for line in f:
            # skip the torch.distributed args
            params = line.split()[5:]
            args = vars(parser.parse_args(params))
            cli_args.append(args)
    
    gather = []
    # grab all files starting with a number (results)
    for result_path in glob(os.path.join(folder_path, '[0-9]*_log.txt')):
        
        file_name = os.path.basename(result_path)
        index = int(re.search(r'(\d)+', file_name).group(0))
        assert index != None
        
        result = load_result(result_path)
        result['index'] = index
        result['job_batch'] = folder
        
        gather.append(result)
    
    df_args = pd.DataFrame(cli_args)
    
    print(folder_path)
    # need to sort to match up with args from now.txt
    df_metrics = pd.DataFrame(gather)
    df_metrics = df_metrics.sort_values(by='index')
    df_metrics = df_metrics.set_index('index')
    
    df_result = pd.concat([df_args, df_metrics], axis = 1)
    df_results = pd.concat([df_results, df_result], axis = 0)


df_results = df_results.drop(['cfg_path', 'options'], axis = 1)

../results/blip2_flickr/uniform_quant/nbit_flickr_8_6
../results/blip2_flickr/uniform_quant/nbit_qformer
../results/blip2_flickr/uniform_quant/nbit_flickr_4_2
../results/blip2_flickr/uniform_quant/nbit_vit


In [7]:
# add baseline results and convert None values to np.nan
df_results = pd.merge(pd.DataFrame([baseline_result]),
                      df_results,
                      how = 'outer')

df_results = df_results.fillna(value=np.nan)
df_results = df_results.dropna(axis = 1, how = 'all')

df_results.sort_values(by = 'model_size', ascending = False)

  df_results = df_results.fillna(value=np.nan)


Unnamed: 0,txt_r1,txt_r5,txt_r10,txt_r_mean,img_r1,img_r5,img_r10,img_r_mean,r_mean,agg_metrics,...,qformer_layer_indices,qformer_self_attention_modules,qformer_self_attention_weight_bits,qformer_cross_attention_modules,qformer_cross_attention_weight_bits,qformer_text_ff_modules,qformer_text_ff_weight_bits,qformer_img_ff_modules,qformer_img_ff_weight_bits,job_batch
654,97.6,100.0,100.0,99.200000,89.74,98.18,98.94,95.620000,97.410000,99.200000,...,"[4, 5, 6, 7, 8, 9, 10, 11]",,6.0,,6.0,,6.0,,6.0,nbit_qformer
647,97.6,100.0,100.0,99.200000,89.74,98.18,98.94,95.620000,97.410000,99.200000,...,"[4, 5, 6, 7, 8, 9, 10, 11]",,8.0,,8.0,,8.0,,8.0,nbit_qformer
649,97.6,100.0,100.0,99.200000,89.74,98.18,98.94,95.620000,97.410000,99.200000,...,"[0, 1, 2, 3]",,6.0,,6.0,,6.0,,6.0,nbit_qformer
650,97.6,100.0,100.0,99.200000,89.74,98.18,98.94,95.620000,97.410000,99.200000,...,"[4, 5, 6, 7]",,6.0,,6.0,,6.0,,6.0,nbit_qformer
651,97.6,100.0,100.0,99.200000,89.74,98.18,98.94,95.620000,97.410000,99.200000,...,"[8, 9, 10, 11]",,6.0,,6.0,,6.0,,6.0,nbit_qformer
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
185,18.2,31.7,37.5,29.133333,11.68,24.20,29.52,21.800000,25.466667,29.133333,...,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]","[query, key, value, dense]",4.0,"[query, key, value, dense]",4.0,"[intermediate, output]",4.0,"[intermediate_query, output_query]",4.0,nbit_flickr_4_2
64,0.1,0.5,0.9,0.500000,0.16,0.54,0.86,0.520000,0.510000,0.500000,...,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]","[query, key, value, dense]",2.0,"[query, key, value, dense]",2.0,,2.0,"[intermediate_query, output_query]",2.0,nbit_flickr_4_2
71,0.1,0.5,1.0,0.533333,0.08,0.56,1.00,0.546667,0.540000,0.533333,...,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]","[query, key, value, dense]",2.0,"[query, key, value, dense]",2.0,"[intermediate, output]",2.0,,2.0,nbit_flickr_4_2
12,0.0,0.4,0.8,0.400000,0.14,0.52,0.86,0.506667,0.453333,0.400000,...,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]",,2.0,,2.0,"[intermediate, output]",2.0,"[intermediate_query, output_query]",2.0,nbit_flickr_4_2


In [8]:
df_results.columns

Index(['txt_r1', 'txt_r5', 'txt_r10', 'txt_r_mean', 'img_r1', 'img_r5',
       'img_r10', 'img_r_mean', 'r_mean', 'agg_metrics', 'model_size',
       'visual_encoder_block_modules', 'visual_encoder_block_indices',
       'visual_encoder_block_weight_bits', 'qformer_layer_indices',
       'qformer_self_attention_modules', 'qformer_self_attention_weight_bits',
       'qformer_cross_attention_modules',
       'qformer_cross_attention_weight_bits', 'qformer_text_ff_modules',
       'qformer_text_ff_weight_bits', 'qformer_img_ff_modules',
       'qformer_img_ff_weight_bits', 'job_batch'],
      dtype='object')

In [13]:
# determine binary categorical variables for if each portion of blip-2 model is quantized or not
def tidy_results(row_dict):
    
    toret = {}
    
    # ------- Visual Encoder Options -------
    VIT_ATTN_MODS = set(['qkv', 'proj'])
    VIT_FF_MODS = set(['fc1', 'fc2'])
    
    if isinstance(row_dict['visual_encoder_block_modules'], list):
        vit_block_mods = set(row_dict['visual_encoder_block_modules'])
        
        toret['vit_attn'] = True if (VIT_ATTN_MODS & vit_block_mods) ==  VIT_ATTN_MODS else False
        toret['vit_ff'] = True if (VIT_FF_MODS & vit_block_mods) == VIT_FF_MODS else False
    
    else:
        toret['vit_attn'] = False
        toret['vit_ff'] = False
    
    
    ALL_VIT_BLOCKS = [i for i in range(39)]
    split = 39//3
    FRONT_VIT_BLOCKS = set(ALL_VIT_BLOCKS[:split])
    MIDDLE_VIT_BLOCKS = set(ALL_VIT_BLOCKS[split:2*split])
    END_VIT_BLOCKS = set(ALL_VIT_BLOCKS[2*split:])
    
    if isinstance(row_dict['visual_encoder_block_indices'], list):
        vit_block_indices = set(row_dict['visual_encoder_block_indices'])
        
        toret['vit_front_blocks']  = True if (FRONT_VIT_BLOCKS & vit_block_indices) == FRONT_VIT_BLOCKS else False
        toret['vit_middle_blocks'] = True if (MIDDLE_VIT_BLOCKS & vit_block_indices) == MIDDLE_VIT_BLOCKS else False
        toret['vit_end_blocks']    = True if (END_VIT_BLOCKS & vit_block_indices) == END_VIT_BLOCKS else False
        
    else:
        toret['vit_front_blocks']  = False
        toret['vit_middle_blocks'] = False
        toret['vit_end_blocks']    = False
    
    toret['vit_weight_bits'] = row_dict['visual_encoder_block_weight_bits']
    
    #  # ------- QFormer Options -------
    
    ALL_QFORMER_LAYERS = [i for i in range(12)]
    split = 12//3   
    FRONT_QFORMER_BLOCKS = set(ALL_QFORMER_LAYERS[:split])
    MIDDLE_QFORMER_BLOCKS = set(ALL_QFORMER_LAYERS[split:2*split])
    END_QFORMER_BLOCKS = set(ALL_QFORMER_LAYERS[2*split:])
    
    if isinstance(row_dict['qformer_layer_indices'], list):
        qformer_block_indices = set(row_dict['qformer_layer_indices'])
        
        toret['qformer_front_blocks'] = True if (FRONT_QFORMER_BLOCKS & qformer_block_indices) == FRONT_QFORMER_BLOCKS else False
        toret['qformer_middle_blocks'] = True if (MIDDLE_QFORMER_BLOCKS & qformer_block_indices) == MIDDLE_QFORMER_BLOCKS else False
        toret['qformer_end_blocks'] = True if (END_QFORMER_BLOCKS & qformer_block_indices) == END_QFORMER_BLOCKS else False
        
    else:
        toret['qformer_front_blocks']  = False
        toret['qformer_middle_blocks'] = False
        toret['qformer_end_blocks']  = False
    
    
    toret['qformer_self_attn'] = True if isinstance(row_dict['qformer_self_attention_modules'], list) else False
    toret['qformer_cross_attn'] = True if isinstance(row_dict['qformer_self_attention_modules'], list) else False
    toret['qformer_text_ff'] = True if isinstance(row_dict['qformer_text_ff_modules'], list) else False
    toret['qformer_img_ff'] = True if isinstance(row_dict['qformer_img_ff_modules'], list) else False
    
    # these should all be the same, if defined
    qformer_weight_bits = np.array([row_dict['qformer_self_attention_weight_bits'],
                                    row_dict['qformer_cross_attention_weight_bits'],
                                    row_dict['qformer_text_ff_weight_bits'],
                                    row_dict['qformer_img_ff_weight_bits']])
    
    toret['qformer_weight_bits']  = np.unique(qformer_weight_bits).item()
    
    if row_dict['job_batch'] == 'nbit_qformer':
        toret['Quantized Portion'] = 'Q-Former'
        # NOTE: only valid for the current set of runs where we are fixing bit width
        toret['weight_bits'] = toret['qformer_weight_bits']
        
    elif row_dict['job_batch'] == 'nbit_flickr_8_6' or row_dict['job_batch'] == 'nbit_flickr_4_2':
        toret['Quantized Portion'] = 'ViT + Q-Former'
        # NOTE: only valid for the current set of runs where we are fixing bit width
        toret['weight_bits'] = toret['vit_weight_bits']
        
    elif row_dict['job_batch'] == 'nbit_vit':
        toret['Quantized Portion'] = 'ViT'
         # NOTE: only valid for the current set of runs where we are fixing bit width
        toret['weight_bits'] = toret['vit_weight_bits']
        
    
    return toret

In [14]:
gather = [tidy_results(row_dict) for row_dict in df_results.to_dict(orient='records')]

In [15]:
df_flickr = pd.concat([df_results, pd.DataFrame(gather)], axis = 1)
df_flickr

Unnamed: 0,txt_r1,txt_r5,txt_r10,txt_r_mean,img_r1,img_r5,img_r10,img_r_mean,r_mean,agg_metrics,...,qformer_front_blocks,qformer_middle_blocks,qformer_end_blocks,qformer_self_attn,qformer_cross_attn,qformer_text_ff,qformer_img_ff,qformer_weight_bits,Quantized Portion,weight_bits
0,0.0,0.0,0.4,0.133333,0.10,0.34,0.72,0.386667,0.260000,0.133333,...,True,False,False,False,False,True,False,2.0,ViT + Q-Former,2.0
1,0.0,0.1,0.3,0.133333,0.06,0.22,0.56,0.280000,0.206667,0.133333,...,True,False,False,False,False,True,False,2.0,Q-Former,"[2.0, 2.0, 2.0, 2.0]"
2,0.0,0.2,0.2,0.133333,0.14,0.30,0.72,0.386667,0.260000,0.133333,...,True,False,True,False,False,True,False,2.0,Q-Former,"[2.0, 2.0, 2.0, 2.0]"
3,0.0,0.3,0.4,0.233333,0.14,0.42,0.62,0.393333,0.313333,0.233333,...,True,False,False,True,True,True,False,2.0,ViT + Q-Former,2.0
4,0.0,0.3,0.7,0.333333,0.16,0.60,1.08,0.613333,0.473333,0.333333,...,False,True,False,True,True,True,False,2.0,ViT + Q-Former,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
975,98.0,100.0,100.0,99.333333,88.12,97.88,98.82,94.940000,97.136667,99.333333,...,False,True,False,False,False,False,False,2.0,ViT + Q-Former,2.0
976,98.0,100.0,100.0,99.333333,88.12,97.88,98.82,94.940000,97.136667,99.333333,...,False,False,False,False,False,False,False,,ViT,2.0
977,98.0,100.0,100.0,99.333333,89.60,98.10,98.96,95.553333,97.443333,99.333333,...,True,True,True,True,True,False,True,4.0,Q-Former,"[4.0, 4.0, 4.0, 4.0]"
978,98.0,100.0,100.0,99.333333,89.66,98.10,98.92,95.560000,97.446667,99.333333,...,False,True,True,True,True,False,True,4.0,Q-Former,"[4.0, 4.0, 4.0, 4.0]"


In [12]:
df_flickr.to_csv('blip2_flickr_results.csv', index = None)