In [1]:
import h5py
import numpy as np
import pandas as pd

In [2]:
def load_attention_maps(hdf5_path):
    with h5py.File(hdf5_path, "r") as f:
        data = []

        # Loop through each image group
        for image_hash in f.keys():
            row_data = {"image_hash": image_hash}

            # Loop through all layers for this image
            for layer_key in f[image_hash].keys():
                attn_map = f[image_hash][layer_key][:]  # Convert back to PyTorch Tensor
                eot_map = np.max(attn_map[:, :, 1:], axis=-1)
                row_data[layer_key] = eot_map
            
            data.append(row_data)

    return pd.DataFrame(data)

In [3]:
# Load the attention maps into a DataFrame
hdf5_path = "shotdeck_attention_maps.h5"
df_attn_maps = load_attention_maps(hdf5_path)

In [6]:
print(df_attn_maps.head(1))

                         image_hash  \
0  00109faf7124346c45aeb05d67ff4ef5   

                                      cross-down-0-0  \
0  [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...   

                                      cross-down-0-1  \
0  [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...   

                                      cross-down-1-0  \
0  [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...   

                                      cross-down-1-1  \
0  [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...   

                                      cross-down-2-0  \
0  [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...   

                                      cross-down-2-1  \
0  [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...   

                                       cross-mid-0-0  \
0  [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...   

                                        cross-up-1-0  \
0  [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...   

                 

In [10]:
print(df_attn_maps.loc[0, 'cross-down-0-1'].shape)

(64, 152)
