In [24]:

from mr_eval.utils.utils import *
import random
import os,sys
import os,sys
sys.path.append("/mnt/petrelfs/songmingyang/code/mm/robustLMM/robustlmm/model_inference/llama_inference/inference")
from prompts import prompt_dict

from transformers import AutoTokenizer
from transformers import AutoModel, AutoConfig

prompt_dict = prompt_dict["llava_caption_to_conversation"]

def convert_to_human_readable_size(num):
    if num / 1e27 > 1:
        return f"{num / 1e27:.2f} R"
    elif num / 1e24 > 1:
        return f"{num / 1e24:.2f} Y"
    elif num / 1e21 > 1:
        return f"{num / 1e21:.2f} Z"
    elif num / 1e18 > 1:
        return f"{num / 1e18:.2f} E"
    elif num / 1e15 > 1:
        return f"{num / 1e15:.2f} P"
    elif num / 1e12 > 1:
        return f"{num / 1e12:.2f} T"
    elif num / 1e9 > 1:
        return f"{num / 1e9:.2f} B"
    elif num / 1e6 > 1:
        return f"{num / 1e6:.2f} M"
    elif num / 1e3 > 1:
        return f"{num / 1e3:.2f} K"
    else:
        return f"{num}"
    
def get_flops(
    num_layers,
    hidden_size,
    num_heads,
    num_key_value_heads,
    vocab_size,
    seq_len,
    ffn_hidden_size,
    batch_size=1,
):
    """Counts flops in an decoder-only model
    Args:
        num_layers: number of decoder layers
        hidden_size: hidden size of the model
        num_heads: number of heads in the model
        num_key_value_heads: number of key/value heads in the model
        ffn_hidden_size: hidden size of the FFN
        vocab_size: size of the vocabulary
        seq_len: sequence length of the decoder
        batch_size: batch size
    Returns:
        model_flops: flops in the model (should be independent of the hardware and model implementation)
        hardware_flops: flops in the hardware (actual flops performed on the hardware). Check 6.3 in https://arxiv.org/pdf/2205.05198.pdf
    """
    if num_key_value_heads is None:
        num_key_value_heads = num_heads
    hidden_size_per_head = hidden_size // num_heads
    # In the following we mark the reduced dimension with parentheses
    # decoder
    # self attention
    ## qkv projection
    decoder_qkv_proj_flops_fwd = (
        2 * num_layers * batch_size * seq_len * (hidden_size) * num_heads * hidden_size_per_head
        + 2 * num_layers * batch_size * seq_len * (hidden_size) * 2 * num_key_value_heads * hidden_size_per_head
    )
    ## qk logits
    decoder_qk_logits_flops_fwd = 2 * num_layers * batch_size * num_heads * seq_len * (hidden_size_per_head) * seq_len
    ## v logits
    decoder_v_logits_flops_fwd = 2 * num_layers * batch_size * num_heads * seq_len * (seq_len) * hidden_size_per_head
    ## attn out
    decoder_attn_out_flops_fwd = (
        2 * num_layers * batch_size * num_heads * seq_len * (hidden_size_per_head) * hidden_size
    )
    # FF
    ## 1st layer
    decoder_ffn_1_flops_fwd = 4 * num_layers * batch_size * seq_len * (hidden_size) * ffn_hidden_size
    ## 2nd layer
    decoder_ffn_2_flops_fwd = 2 * num_layers * batch_size * seq_len * (ffn_hidden_size) * hidden_size

    decoder_flops_fwd = (
        decoder_qkv_proj_flops_fwd
        + decoder_qk_logits_flops_fwd
        + decoder_v_logits_flops_fwd
        + decoder_attn_out_flops_fwd
        + decoder_ffn_1_flops_fwd
        + decoder_ffn_2_flops_fwd
    )

    # lm head
    lm_head_flops_fwd = 2 * batch_size * seq_len * (hidden_size) * vocab_size

    # the bwd pass requires double the flops in case of matmuls to calculate the gradients with respect to
    # both input and weight tensors
    model_flops = 3 * (decoder_flops_fwd + lm_head_flops_fwd)  # 1 for fwd + 2 for bwd

    hardware_flops = model_flops  # TODO: This is a placeholder for now

    return model_flops, hardware_flops



input_data = "/mnt/petrelfs/songmingyang/songmingyang/data/llava_train/LLaVA-Instruct-150K/control/llavaft_control_llava13b_caps.jsonl"
input_data = process_jsonl(input_data)

output_data = "/mnt/petrelfs/songmingyang/songmingyang/data/llava_train/LLaVA-Instruct-150K/control/llavaft_control_llava13b_convs.jsonl"
output_data = process_jsonl(output_data)

output_length_dict = {item["id"]: len(item["origin_output"]) for item in output_data}

input_data = random.sample(input_data, 1000)
tokenizer = AutoTokenizer.from_pretrained("/mnt/petrelfs/songmingyang/quxiaoye/models/Llama-3.1-8B-Instruct")



# Replace with your LLaMA model path or Hugging Face hub name
model_name = "/mnt/petrelfs/songmingyang/quxiaoye/models/Meta-Llama-3-70B-Instruct-back"

# Load the model configuration
config = AutoConfig.from_pretrained(model_name)

# Access key parameters
num_layers = config.num_hidden_layers
hidden_size = config.hidden_size
num_heads = config.num_attention_heads
num_key_value_heads = getattr(config, "num_key_value_heads", None)  # Might not exist in all models
vocab_size = config.vocab_size
seq_len = config.max_position_embeddings
ffn_hidden_size = getattr(config, "intermediate_size", None)  # FFN hidden size


In [25]:
from copy import deepcopy
messages = [
    {
        "role": "system",
        "content": prompt_dict["system"]
    },
    {
        "role": "user",
        "content": prompt_dict["fewshot"][0][0]
    },
    {
        "role": "assistant",
        "content": prompt_dict["fewshot"][0][1]
    },
    {
        "role": "user",
        "content": prompt_dict["fewshot"][1][0]
    },
    {
        "role": "assistant",
        "content": prompt_dict["fewshot"][1][1]
    },
]
target_input_data = input_data[3]

messages2 = deepcopy(messages)
messages2.append({
        "role": "user",
        "content": target_input_data["caption"]
    },)

target_length = output_length_dict[target_input_data["id"]]
inputs = tokenizer.apply_chat_template(messages2, tokenize=True)

print(f"Input length: {len(inputs)}, Target length: {target_length}")

Input length: 1635, Target length: 1938


In [26]:
len(inputs)

1635

In [27]:
from tqdm import tqdm
from copy import deepcopy
target_convs = []
all_flops = 0
output_path = "./llama3_flops.jsonl"
for idx,item in enumerate(tqdm(input_data)):
    current_message = deepcopy(messages)
    current_message.append({"role": "user","content":item["caption"]})
    inputs = tokenizer.apply_chat_template(current_message, tokenize=True)
    start_length = len(inputs)
    item_id = item["id"]
    target_length = output_length_dict.get(item_id, 1000)
    total_flops = 0
    for i in range(start_length, start_length+target_length):
        seq_len = i
        flops = get_flops(num_layers, hidden_size, num_heads, num_key_value_heads, vocab_size, seq_len, 
                          ffn_hidden_size, batch_size=1)
        total_flops += flops[0]
    append_jsonl({"id": item_id, "flops": total_flops}, output_path)
    all_flops += total_flops

print(f"Total flops: {convert_to_human_readable_size(all_flops)}")
    
balance = 581745
aug = 665298

real_flops = all_flops / 1000 * (aug - balance)
print(f"Real flops: {convert_to_human_readable_size(real_flops)}")


100%|██████████| 1000/1000 [00:08<00:00, 122.14it/s]

Total flops: 1.75 Z
Real flops: 145.94 Z





In [8]:
balance = 581745
aug = 665298
100*(aug-balance)/1e6

8.3553

In [9]:
aug-balance

83553

In [6]:
8.36+8.26+1.66

18.279999999999998

In [4]:
from mr_eval.utils.utils import *
input_data = "/mnt/petrelfs/songmingyang/songmingyang/data/llava_train/sharegpt4v_train/pt/share-captioner_coco_lcs_sam_1246k_1107.json"
input_data = load_json_file(input_data)

In [5]:
len(input_data)

1246901

In [7]:
origin_data = "/mnt/petrelfs/songmingyang/songmingyang/data/llava_train/sharegpt4v_train/pt/reformed_data/dinoobj_toc/share_meta_dinoobj_all_p0_alpha_1.json"
origin_data = "/mnt/petrelfs/songmingyang/songmingyang/data/llava_train/sharegpt4v_train/pt/reformed_data/dinoobj_toc/share_meta_dinoobj_all_p0_alpha_1_adjust_threshold.json"
origin_data = load_json_file(origin_data)
len(origin_data)

1168639

In [11]:
balance = 1168639
aug = 1246901
100*(aug-balance)/1e6

7.8262

In [12]:
7.83 + 1.55 + 7.58

16.96

In [13]:
aug-balance

78262