In [None]:
%load_ext autoreload
%autoreload 2

import numpy as np
import os
import random
import sys
from tqdm import tqdm

sys.path.insert(0, '../src')
sys.path.insert(0, '..')
from main import get_model
from models.config import Config, IMAGE_TOKEN_IDS
import db_utils, utils, visualizations, similarity_utils

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Obtain paths for non-face TLL image pairs

In [34]:
def filter_images(data_dir,
                  visualize=False,
                  save_dir=None,
                  overwrite=False):
    if save_dir is not None:
        right_save_path = os.path.join(save_dir, "filtered_right_paths.txt")
        left_save_path = os.path.join(save_dir, "filtered_left_paths.txt")
        if os.path.exists(right_save_path) and os.path.exists(left_save_path) and not overwrite:
            utils.informal_log("File exists at {} and {} and not overwriting".format(
                left_save_path, right_save_path))
            return utils.read_file(left_save_path), utils.read_file(left_save_path)

    metadata_path = os.path.join(data_dir, "metadata.pkl")
    metadata = utils.read_file(metadata_path)

    # Map image names from Images/ to right/left names
    paired_image_names = metadata['image_list']
    single_image_names = sorted(os.listdir(os.path.join(data_dir, "right")))
    paired_single_dict = dict(zip(paired_image_names, single_image_names))

    # Get list of image names that are in "no_faces"
    filtered_boolean = metadata['no_faces']
    filtered_paired_image_names = np.array(paired_image_names)[filtered_boolean]
    # Get the corresponding right/left image names
    filtered_single_image_names = [paired_single_dict[paired_name] for paired_name in filtered_paired_image_names]
    assert len(filtered_paired_image_names) == len(filtered_single_image_names)
    if visualize:
        rand_int = random.randint(0, len(filtered_single_image_names) - 1)
        utils.informal_log("Randomly visualizing image {}".format(rand_int))
        visualizations.show_image_rows(
            [[utils.read_file(os.path.join(data_dir, "Images", filtered_paired_image_names[rand_int])),
            utils.read_file(os.path.join(data_dir, "left", filtered_single_image_names[rand_int])),
            utils.read_file(os.path.join(data_dir, "right", filtered_single_image_names[rand_int]))]]
        )

    # Make separate lists for right and left
    left_save_paths = []
    right_save_paths = []
    for filename in filtered_single_image_names:
        left_save_paths.append(os.path.join(data_dir, "left", filename))
        right_save_paths.append(os.path.join(data_dir, "right", filename))
    if save_dir is not None:
        utils.write_file(left_save_paths, left_save_path, overwrite=overwrite)
        utils.write_file(right_save_paths, right_save_path, overwrite=overwrite)

    return left_save_paths, right_save_paths


In [35]:
data_dir = "../data_local/tll/totally_looks_like"
save_dir = "../data_local/tll"

image_names = filter_images(
    data_dir=data_dir,
    visualize=True,
    save_dir=save_dir,
    overwrite=False)

[1027_094803] File exists at ../data_local/tll/filtered_left_paths.txt and ../data_local/tll/filtered_right_paths.txt and not overwriting


## Run Model on Left and Right Images

### Left Images

In [7]:
sys.argv = ['notebooks/get_representations.ipynb',
            '--config', '../configs/models/qwen/Qwen2-VL-7B-Instruct-TLL-Left.yaml']

config = Config()

In [8]:
model = get_model(config.architecture, config)


n_modules = 0
layer_names = []
for name, module in model.model.named_modules():
    if model.config.matches_module(name):
        print(name)
        layer_names.append(name)
        n_modules += 1
utils.informal_log("{} modules matched".format(n_modules))

Loading checkpoint shards: 100%|██████████| 5/5 [00:00<00:00, 24.17it/s]
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


visual.blocks.31
visual.merger
model.layers.27
model.norm
[1027_162306] 4 modules matched


In [9]:
# Run model -- first checking if we would overwrite anything
left_db_path = model.config.output_db
utils.informal_log("Database path: {}".format(left_db_path))
proceed = True
if os.path.exists(left_db_path):
    response = input("File exists at {}. Are you sure you want to overwrite? (Y/N)".format(left_db_path))
    if response.lower() != "y":
        proceed = False
    else:
        os.remove(left_db_path)

if proceed:
    # Run model on images
    model.run(save_tokens=True)
else:
    utils.informal_log("Not overwriting file at {}".format(left_db_path))

[1027_162309] Database path: ../output/TLL/Qwen2-VL-7B-Instruct/TLL-Left.db


[1027_162310] Not overwriting file at ../output/TLL/Qwen2-VL-7B-Instruct/TLL-Left.db


### Right Images

In [10]:
sys.argv = ['notebooks/get_representations.ipynb',
            '--config', '../configs/models/qwen/Qwen2-VL-7B-Instruct-TLL-Right.yaml']

config = Config()

In [11]:
model = get_model(config.architecture, config)


n_modules = 0
layer_names = []
for name, module in model.model.named_modules():
    if model.config.matches_module(name):
        print(name)
        layer_names.append(name)
        n_modules += 1
utils.informal_log("{} modules matched".format(n_modules))

Loading checkpoint shards: 100%|██████████| 5/5 [00:00<00:00, 23.69it/s]


visual.blocks.31
visual.merger
model.layers.27
model.norm
[1027_162319] 4 modules matched


In [12]:
# Run model -- first checking if we would overwrite anything
right_db_path = model.config.output_db
utils.informal_log("Database path: {}".format(right_db_path))
proceed = True
if os.path.exists(right_db_path):
    response = input("File exists at {}. Are you sure you want to overwrite? (Y/N)".format(right_db_path))
    if response.lower() != "y":
        proceed = False
    else:
        os.remove(right_db_path)

if proceed:
    # Run model on images
    model.run(save_tokens=True)
else:
    utils.informal_log("Not overwriting file at {}".format(right_db_path))

[1027_162319] Database path: ../output/TLL/Qwen2-VL-7B-Instruct/TLL-Right.db


[1027_162321] Not overwriting file at ../output/TLL/Qwen2-VL-7B-Instruct/TLL-Right.db


## TODO: Add instructions to run the python script notebooks/tll.py in order to save from .db -> .npy files

## Get Embeddings and Compute Similarities

In [24]:
modality = "vision"
left_db_dir = os.path.splitext(left_db_path)[0]
right_db_dir = os.path.splitext(right_db_path)[0]

left_input_ids = similarity_utils.get_embedding(
    database_path=left_db_dir,
    layer_name="input_ids"
)

right_input_ids = similarity_utils.get_embedding(
    database_path=right_db_dir,
    layer_name="input_ids"
)

similarity_save_dir = os.path.join(os.path.dirname(left_db_dir), "similarities")
utils.ensure_dir(similarity_save_dir)
overwrite = False

for layer_name in tqdm(layer_names):
    save_path = os.path.join(similarity_save_dir, "sim-{}.npy".format(layer_name))
    if os.path.exists(save_path) and not overwrite:
        utils.informal_log("File exists at {} and not overwriting".format(save_path))
        continue

    layer_modality = model.get_layer_modality(layer_name)
    left_embedding, left_same_shapes = similarity_utils.get_embedding(
        database_path=left_db_dir,
        layer_name=layer_name
    )

    right_embedding, right_same_shapes = similarity_utils.get_embedding(
        database_path=right_db_dir,
        layer_name=layer_name
    )
    print(left_embedding.shape, right_embedding.shape)

    left_modality_name, left_modality_embedding, left_n_embeddings = similarity_utils.extract_modality(
        layer_modality=layer_modality,
        modality=modality,
        module_embedding=left_embedding,
        module_name=layer_name,
        image_token_id=IMAGE_TOKEN_IDS[model.config.architecture],
        input_ids=left_input_ids,
        module_embedding_same_shapes=left_same_shapes
    )

    right_modality_name, right_modality_embedding, right_n_embeddings = similarity_utils.extract_modality(
        layer_modality=layer_modality,
        modality=modality,
        module_embedding=right_embedding,
        module_name=layer_name,
        image_token_id=IMAGE_TOKEN_IDS[model.config.architecture],
        input_ids=right_input_ids,
        module_embedding_same_shapes=right_same_shapes)

    # Calculate mean embedding
    left_mean_embeddings = db_utils.compute_mean_embeddings(
        embeddings=left_modality_embedding,
        n_embeddings=left_n_embeddings)

    right_mean_embeddings = db_utils.compute_mean_embeddings(
        embeddings=right_modality_embedding,
        n_embeddings=right_n_embeddings)

    similarities = db_utils.cosine_similarity_numpy(
        left_mean_embeddings,
        right_mean_embeddings,
        elementwise=True)


    print(layer_name, np.mean(similarities))
    utils.write_file(similarities, save_path, overwrite=overwrite)


[1027_163943] File exists at ../output/TLL/Qwen2-VL-7B-Instruct/similarities/sim-visual.blocks.31.npy and not overwriting
(1622, 144, 3584) (1622, 144, 3584)
visual.merger 0.780057403762651
Saved file to ../output/TLL/Qwen2-VL-7B-Instruct/similarities/sim-visual.merger.npy
(1622, 170, 3584) (1622, 170, 3584)


AttributeError: 'tuple' object has no attribute 'shape'

In [5]:
print(left.shape)

(1622, 576, 1280)


In [None]:
(right_layer_names, right_embeddings, _) = db_utils.get_all_embeddings(
    db_path=right_db_path,
    device='cuda')

In [11]:
print(left_embeddings[2][:5])

('visual.merger', array([[-2.6250000e+00, -1.9628906e-01,  7.3828125e-01, ...,
         1.7031250e+00,  6.3750000e+00,  2.1972656e-02],
       [-1.5703125e+00, -2.0000000e+00,  2.2187500e+00, ...,
         8.9843750e-01,  4.7187500e+00,  4.3164062e-01],
       [ 8.1640625e-01,  1.3515625e+00,  1.6171875e+00, ...,
        -6.0156250e-01,  5.3515625e-01,  9.0625000e-01],
       ...,
       [ 6.9531250e-01,  3.6773682e-03, -2.9101562e-01, ...,
         2.5000000e-01,  2.8710938e-01, -1.4941406e-01],
       [-1.1669922e-01, -8.3984375e-01,  3.1640625e-01, ...,
         6.3671875e-01,  7.2265625e-01, -2.3437500e-01],
       [-3.5156250e+00, -6.6015625e-01, -4.0625000e-01, ...,
         6.6406250e-01, -1.2265625e+00, -1.9453125e+00]], dtype=float32), None)
