In [1]:
import torch
import importlib

In [18]:
import copy

covs = torch.load("./calibs/opt-1.3b-0_4-alt_bi-2.pt")
bi_scores = copy.deepcopy(covs["bi_scores"])
del covs
bi_scores


[0.8786431307515659,
 0.02865595771819071,
 0.012344635480324384,
 0.010712580377896158,
 0.007945304756589056,
 0.008145043678833064,
 0.010687641461152614,
 0.013192261723785459,
 0.016911443286873856,
 0.020114605027894722,
 0.02635363474356653,
 0.030209825229025183,
 0.032271321900338296,
 0.03522470501741087,
 0.03730167941275511,
 0.0424363124975172,
 0.0460241475119369,
 0.04737130553497762,
 0.051858460256311495,
 0.04985118483729031,
 0.053380099567878675,
 0.05049064415166116,
 0.05366042848140387,
 0.04291894399760937]

In [None]:
import compression_utils as CU
import matplotlib.pyplot as plt
import numpy as np

importlib.reload(CU)
smoothing = 0.01
inc = 0.1
sparsities = []
while smoothing <= 2:
    sparsity = CU.allocate_global_sparsity(
        bi_scores, compression_ratio=0.4, smoothing=smoothing
    )  # reversing the minus 1
    sparsities.append(sparsity)
    smoothing += inc

# Create figure and axis
plt.figure(figsize=(12, 6))

# Create x-axis values (layer numbers)
layers = np.arange(len(sparsities[0]))

# Plot a line for each smoothing value
for idx, sparsity in enumerate(sparsities):
    smoothing_val = 0.01 + (idx * 0.1)  # Calculate the smoothing value
    plt.plot(layers, sparsity, label=f"Smoothing: {smoothing_val:.1f}")

# Customize the plot
plt.xlabel("Layer Number")
plt.ylabel("Sparsity")
plt.title("Sparsity Distribution Across Layers for Different Smoothing Values")
plt.legend(bbox_to_anchor=(1.05, 1), loc="upper left")
plt.grid(True, linestyle="--", alpha=0.7)

# Adjust layout to prevent label cutoff
plt.tight_layout()

# Show the plot
plt.show()

In [None]:
# Create a plot for bi_scores
plt.figure(figsize=(12, 6))

# Convert bi_scores to numpy if it's a torch tensor
if torch.is_tensor(bi_scores):
    bi_scores_np = bi_scores.cpu().numpy()
else:
    bi_scores_np = np.array(bi_scores)

# Create x-axis values (layer indices)
layers = np.arange(len(bi_scores_np))

# Create a bar plot of the scores
plt.bar(layers, bi_scores_np, alpha=0.7)

# Customize the plot
plt.xlabel("Layer Index")
plt.ylabel("Score Value")
plt.title("Bi-scores Distribution Across Layers")

# Add grid
plt.grid(True, linestyle="--", alpha=0.3)

# Adjust layout
plt.tight_layout()

# Show the plot
plt.show()

# Print shape information
print(f"Bi-scores shape: {bi_scores_np.shape}")

## Calibration testing

In [1]:
import torch
from model_utils import load_model, get_model_attrs
from compression_utils import get_embedders, get_layer_block
import eval
import importlib

importlib.reload(eval)

torch.set_default_device("cuda")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model, tokenizer, config = load_model("facebook/opt-1.3b")
model.cuda()
text_arr = eval.load_calibration_texts(4, model, tokenizer)

inputs = tokenizer(
    text_arr.tolist(), return_tensors="pt", truncation=True, padding=True, max_length=2048
)


`torch_dtype` is deprecated! Use `dtype` instead!


In [3]:
input_ids = inputs.input_ids.to("cuda")
print(input_ids.shape)

n_layers, n_heads, d_model, head_dim, arch = get_model_attrs(model)

cov_q_list = [
    [torch.zeros(head_dim, head_dim, dtype=torch.float64, device="cuda") for _ in range(n_heads)]
    for _ in range(n_layers)
]
cov_k_list = [
    [torch.zeros(head_dim, head_dim, dtype=torch.float64, device="cuda") for _ in range(n_heads)]
    for _ in range(n_layers)
]

torch.Size([4, 2048])


In [None]:
model.eval()
with torch.no_grad():
    embed_tokens, embed_positions = get_embedders(model)
    token_embeddings = embed_tokens(input_ids)
    position_ids = torch.arange(0, 2048, dtype=torch.long, device="cuda").unsqueeze(0)
    position_embeddings = embed_positions(position_ids)
    print(
        f"token_embeddings.shape = {token_embeddings.shape}, position_embeddings.shape = {position_embeddings.shape}"
    )
    hidden_states = token_embeddings + position_embeddings
    print(f"hidden_states.shape = {hidden_states.shape}")
    layer = get_layer_block(model, 0)
    print(f"q_proj.shape {layer.self_attn.q_proj.weight.shape}")
    q_states = layer.self_attn.q_proj(hidden_states)
    print(f"q_states.shape = {q_states.shape}")
    # for layer in range(n_layers):
    #     pass


In [9]:
import torch

A = torch.zeros((2048, 2048))
B = A.split(64, dim=0)
head1 = B[0]
print(head1.shape)

head1_bias =  torch.zeros(64).reshape(1, -1)
print(f"head1_bias.reshape(1, -1).shape = {head1_bias.shape}")

some_tensor = torch.cat((head1.mT, head1_bias))
some_tensor.shape

torch.Size([64, 2048])
head1_bias.reshape(1, -1).shape = torch.Size([1, 64])


torch.Size([2049, 64])