In [None]:
    import os
    import torch as t
    import numpy as np
    from tqdm import tqdm
    import pandas as pd

    activation_dir = "/kaggle/working/Truth_is_Universal/acts/Gemma2/2B/chat/cities"
    num_layers = 26

    # Structure: layer_outputs[layer_idx] = [vector_0, vector_1, ..., vector_N]
    layer_outputs = [[] for _ in range(num_layers)]

    for fname in sorted(os.listdir(activation_dir)):
        if not fname.endswith(".pt"):
            continue
        # Filename format: layer_{layer_idx}_{batch_start}.pt
        parts = fname.replace(".pt", "").split("_")
        if len(parts) != 3:
            continue
        try:
            layer_idx = int(parts[1])
        except ValueError:
            continue
        if layer_idx >= num_layers:
            continue

        file_path = os.path.join(activation_dir, fname)
        acts = t.load(file_path)  # shape: [batch_size, hidden_dim]
        acts_np = acts.cpu().numpy()

        layer_outputs[layer_idx].append(acts_np) 
    # Now stack batches for each layer
    final_layerwise_vectors = [np.concatenate(batches, axis=0) for batches in layer_outputs]
    # final_layerwise_vectors[i].shape = [num_examples, hidden_dim]

    print("Collected activations for:")
    for i, arr in enumerate(final_layerwise_vectors):
        print(f"  Layer {i}: {arr.shape}")


    import os
    import torch as t
    dataset = "cities"
    activation_dir = f"acts/Gemma2/2B/chat/{dataset}"
    example_index = 425 #putting 426 gives me 427th ex, or 428th row
    batch_size = 25

    batch_start = (example_index // batch_size) * batch_size
    offset_in_batch = example_index % batch_size

    layer_vectors = []

    for layer_idx in range(26):
        file_path = os.path.join(activation_dir, f"layer_{layer_idx}_{batch_start}.pt")
        if not os.path.exists(file_path):
            print(f"Missing file: {file_path}")
            continue
        
        acts = t.load(file_path)
        vector = acts[offset_in_batch]  # shape: [hidden_dim]
        layer_vectors.append(vector.cpu().numpy())

    print(f"Collected {len(layer_vectors)} layer vectors for example {example_index}")
    df_data = pd.read_csv(f"/kaggle/working/Truth_is_Universal/datasets/{dataset}.csv")
    print(f"Example : cities/{example_index}/{df_data.iloc[example_index]}")