In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.model_selection import train_test_split
import numpy as np

In [2]:
from tabpfn_ext import TabPFNClassifier

In [7]:
# Load data
X, y = load_breast_cancer(return_X_y=True)

X_repeated = np.repeat(X, 1, axis=0)
y_repeated = np.repeat(y, 1, axis=0)

print("Repeated shape:", X_repeated.shape)

X_train, X_test, y_train, y_test = train_test_split(X_repeated, y_repeated, test_size=0.5, random_state=42)

Repeated shape: (569, 30)


In [8]:
X_repeated.shape

(569, 30)

In [5]:
# Initialize a classifier
clf = TabPFNClassifier()
clf.fit(X_train, y_train)

Using parallel_mode=block with return_as=list
Reduced from 76 to 37 features
MPS Memory currently allocated (bytes): 0
MPS Total allocated memory by driver (bytes): 475136
Reduced from 76 to 34 features
MPS Memory currently allocated (bytes): 0
MPS Total allocated memory by driver (bytes): 475136
Reduced from 76 to 37 features
MPS Memory currently allocated (bytes): 0
MPS Total allocated memory by driver (bytes): 475136
Reduced from 76 to 33 features
MPS Memory currently allocated (bytes): 0
MPS Total allocated memory by driver (bytes): 475136
Reduced from 31 to 13 features
MPS Memory currently allocated (bytes): 0
MPS Total allocated memory by driver (bytes): 475136
Reduced from 31 to 13 features
MPS Memory currently allocated (bytes): 0
MPS Total allocated memory by driver (bytes): 475136
Reduced from 31 to 13 features
MPS Memory currently allocated (bytes): 0
MPS Total allocated memory by driver (bytes): 475136
Reduced from 31 to 13 features
MPS Memory currently allocated (bytes): 0

In [6]:
%%time
# Predict probabilities
prediction_probabilities = clf.predict_proba(X_test)

# print(prediction_probabilities)
print("ROC AUC:", roc_auc_score(y_test, prediction_probabilities[:, 1]))

Iter_outputs 4
parallel_execute: Running inference on ensemble members.
parallel_execute: Completed inference on ensemble members.
Only one device detected. Executing in the current thread.
Reduced from 76 to 37 features
Using torch.inference_mode for inference.
MPS Memory currently allocated (bytes): before self.model() : 29832192
MPS Total allocated memory by driver (bytes): before self.model(): 34045952
MPS Memory currently allocated (bytes): before transformer_encoder : 117814784
MPS Total allocated memory by driver (bytes): before transformer_encoder: 1264451584
share_kv_across_n_heads: 1
MPS Memory currently allocated (bytes): before scaled_dot_product_attention 642205184
MPS Total allocated memory by driver (bytes): before scaled_dot_product_attention 1232994304
Elapsed time: 0.116949 seconds
MPS Memory currently allocated (bytes): finished 729603584
MPS Total allocated memory by driver (bytes): finished 1289617408
share_kv_across_n_heads: 1
MPS Memory currently allocated (bytes

RuntimeError: MPS backend out of memory (MPS allocated: 1.04 GB, other allocations: 13.91 GB, max allowed: 18.13 GB). Tried to allocate 3.62 GB on private pool. Use PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 to disable upper limit for memory allocations (may cause system failure).

In [15]:
import torch
import hashlib
import numpy as np

# Example tensor: shape (num_rows, num_columns)
tensor = torch.rand(10000, 500) 

# Convert columns to numpy arrays, hash each one
def hash_tensor_column(col):
    col_bytes = col.numpy().tobytes()  # Convert to bytes
    return hashlib.md5(col_bytes).hexdigest()

# Transpose to iterate over columns
cols = tensor.t()
hashes = [hash_tensor_column(col) for col in cols]
_, unique_indices = np.unique(hashes, return_index=True)
unique_indices_sorted = sorted(unique_indices)

# Get tensor with unique columns only
unique_tensor = tensor[:, unique_indices_sorted]

print(unique_tensor)

tensor([[0.7156, 0.0431, 0.0466,  ..., 0.4632, 0.0519, 0.8376],
        [0.8305, 0.1702, 0.0688,  ..., 0.3416, 0.5009, 0.7858],
        [0.8889, 0.3182, 0.2094,  ..., 0.5747, 0.8757, 0.8355],
        ...,
        [0.2162, 0.0302, 0.6480,  ..., 0.7783, 0.8364, 0.1507],
        [0.6451, 0.0922, 0.4995,  ..., 0.0951, 0.4936, 0.3479],
        [0.4806, 0.2587, 0.2407,  ..., 0.1417, 0.6283, 0.8042]])


In [16]:
print(unique_tensor.shape)

torch.Size([10000, 500])


In [17]:
# Example tensor X of shape (num_samples, num_features)
def remove_correlated(tensor):
    X = tensor # for example
    
    # Normalize columns to unit norm
    X_norm = X / X.norm(dim=0, keepdim=True)
    
    # Compute cosine similarity matrix between columns (features)
    sim_matrix = torch.matmul(X_norm.T, X_norm)  # shape (num_features, num_features)
    
    # Threshold and remove highly similar columns
    threshold = 0.97
    to_remove = set()
    n_features = X.shape[1]
    for i in range(n_features):
        for j in range(i + 1, n_features):
            if sim_matrix[i, j] > threshold:
                to_remove.add(j)
    
    selected_indices = [i for i in range(n_features) if i not in to_remove]
    X_reduced = X[:, selected_indices]
    
    print(f"Original shape: {X.shape}, Reduced shape: {X_reduced.shape}")
    return X_reduced

In [18]:
n_samples = 100000
n_features = 500

base_col = torch.randn(n_samples, 1)  # base column vector

# Define a noise vector with a different noise scale per column
noise_scales = torch.rand(n_features) * 0.3  # e.g., noise std dev between 0 and 0.1

# Generate noise matrix with shape (n_samples, n_features)
noise = torch.randn(n_samples, n_features) * noise_scales  # automatically broadcasts noise_scales along rows

# Add different noise to each column based on noise_scales
correlated_matrix = base_col + noise

print(correlated_matrix.shape)  # (10000, 500)

# Check correlation for a few columns
corr_matrix = torch.corrcoef(correlated_matrix.T)
print(corr_matrix[:5, :5])

torch.Size([100000, 500])
tensor([[1.0000, 0.9959, 0.9696, 0.9850, 0.9854],
        [0.9959, 1.0000, 0.9736, 0.9890, 0.9895],
        [0.9696, 0.9736, 1.0000, 0.9630, 0.9634],
        [0.9850, 0.9890, 0.9630, 1.0000, 0.9786],
        [0.9854, 0.9895, 0.9634, 0.9786, 1.0000]])


In [19]:
decorrelated = remove_correlated(correlated_matrix)

Original shape: torch.Size([100000, 500]), Reduced shape: torch.Size([100000, 75])


In [83]:
# Check correlation for a few columns
corr_matrix = torch.corrcoef(decorrelated.T)
print(corr_matrix[:5, :5])

tensor([[1.0000, 0.9650, 0.9579, 0.9595, 0.9608],
        [0.9650, 1.0000, 0.9242, 0.9260, 0.9285],
        [0.9579, 0.9242, 1.0000, 0.9194, 0.9203],
        [0.9595, 0.9260, 0.9194, 1.0000, 0.9206],
        [0.9608, 0.9285, 0.9203, 0.9206, 1.0000]])


In [13]:
from lshashpy3 import LSHash
import numpy as np

# Initialize LSH for vectors of dimension 128, with 10 hash tables (num_hashtables)
lsh = LSHash(hash_size=8, input_dim=128, num_hashtables=5)

# Add data points (vectors)
vector = np.random.randn(128)
# print(vector)
lsh.index(vector, extra_data = 1)

# Query for nearest neighbors
query = np.random.randn(128)
results = lsh.query(vector, num_results=100)
print(results)

[(((1.667167937838927, 0.021067031196282046, -1.062389766586226, 0.8979769559205804, -0.3764693010382622, 1.0078820241854531, -0.24545000742959602, -1.8523442913582666, 0.9627946690861312, 0.3524175620598159, -0.9837872669723638, -0.7807438783884271, 0.06453263048717099, -0.1041605134264547, -0.9490586572622636, -0.5849740918675714, 0.28279364840032156, -0.6265301049087184, -0.06310669563379105, 0.9705159090538583, -1.386804448968495, -0.28505150376691546, 0.15862447226955062, 0.12473856688652137, 0.5403121434146622, -0.11515326571618022, -1.1345211234738783, 1.5609355143826595, 1.2075505901879295, -1.0984225662049683, 0.4168341126398801, 0.33829729368802586, 0.5144166594458067, 0.4791669511738508, 0.04392825343323537, -0.79776782034572, -0.3730989866009799, 0.884342611828041, -0.963405493031893, -0.31996843692828947, -1.689558273797796, 0.8711025657386609, 0.028292643499445748, 0.2471200411234564, 0.7516905883722862, -0.6177278300382819, -1.0218012699801535, -0.3191782842995634, -0.66

In [1]:
# Predict labels
predictions = clf.predict(X_test)
print("Accuracy", accuracy_score(y_test, predictions))

NameError: name 'clf' is not defined

In [11]:
print(clf.classes_)

[0 1]


In [None]:
max_index = 10000  # size of dataset
N = 1000           # chunk size

indices_chunks = [range(i, min(i + N, max_index)) for i in range(0, max_index, N)]
indices_chunks = [np.array(chunk) for chunk in indices_chunks]
indices_chunks

In [29]:
import torch
import inspect

# Save the original __new__ method
original_new = torch._C._TensorBase.__new__

def custom_new(cls, *args, **kwargs):
    # Get caller frame info (1 level above current)
    frame = inspect.stack()[1]
    filename = frame.filename
    lineno = frame.lineno

    # Call the original __new__ to create instance
    instance = original_new(cls, *args, **kwargs)

    print(f"A new tensor was created at {filename}:{lineno}\nTensor: {instance}")
    return instance

# Monkey patch torch.Tensor.__new__
torch.Tensor.__new__ = custom_new

# Test tensor creation to see file and line number
t = torch.Tensor([1, 2, 3])

A new tensor was created at /var/folders/sg/nff54zj50zl8mzy2j3lnw34w0000gn/T/ipykernel_97458/3516784111.py:23
Tensor: tensor([1., 2., 3.])


In [None]:
from tabpfn_extensions.post_hoc_ensembles.sklearn_interface import AutoTabPFNClassifier

In [12]:
clf = AutoTabPFNClassifier(max_time=120, device="cuda") # 120 seconds tuning time
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)

  self.device_ = infer_device_and_type(self.device)
  from .autonotebook import tqdm as notebook_tqdm
Failed to save metadata file due to exception 'NoneType' object has no attribute 'lower', skipping...
Consider using a GPU or the tabpfn-client API: https://github.com/PriorLabs/tabpfn-client
Consider using a GPU or the tabpfn-client API: https://github.com/PriorLabs/tabpfn-client
Consider using a GPU or the tabpfn-client API: https://github.com/PriorLabs/tabpfn-client
Consider using a GPU or the tabpfn-client API: https://github.com/PriorLabs/tabpfn-client
Consider using a GPU or the tabpfn-client API: https://github.com/PriorLabs/tabpfn-client
Consider using a GPU or the tabpfn-client API: https://github.com/PriorLabs/tabpfn-client


RuntimeError: No models were trained successfully during fit(). Inspect the log output or increase verbosity to determine why no models were fit. Alternatively, set `raise_on_no_models_fitted` to False during the fit call.

The "dataset-specific preprocessings" mentioned for TabPFN refer to various data preparation techniques that can improve the model’s performance beyond its default capabilities. While TabPFN can handle raw tabular data with minimal preprocessing—
automatically managing
- missing values,
- encoding categorical variables,
- and normalizing features—performance

can be enhanced by applying specific preprocessing steps based on the characteristics of the dataset.

These preprocessing techniques include:

- Zero-padding features to a fixed input dimensionality if the dataset has fewer features than expected.

- Applying transformations (e.g., power transforms like Yeo–Johnson) to make feature distributions closer to normal, which suits the model’s assumptions.

- Quantile transformation where inputs are quantized to evenly spaced values, often doubling features by keeping original copies.

- Category shuffling for categorical features with low cardinality, which helps generalization.

- Outlier removal to discard extreme values far from the mean.

- Adding compressed feature representations like SVD components.

- Using domain knowledge to combine or remove less relevant features.

- Grouping data based on random forests for heterogeneous datasets, splitting into homogeneous subsets.

These preprocessings tailor the data better for TabPFN’s neural architecture, which expects roughly normally distributed features after transformation, and can significantly boost accuracy, especially when manually tuned or used with hyperparameter optimization techniques. They also aid in adapting TabPFN to larger datasets and more complex tasks.

## BlockMask

In [10]:
import torch
from torch.nn.attention.flex_attention import flex_attention, create_block_mask

In [12]:


def causal_mask(b, h, q_idx, kv_idx):
    return q_idx >= kv_idx

sed_len_kv = 4


block_mask = create_block_mask(causal_mask, 1, 1, sed_len_kv, sed_len_kv)
query = torch.randn(1, 1, sed_len_kv, 64, dtype=torch.float16)
key = torch.randn(1, 1, sed_len_kv, 64, dtype=torch.float16)
value = torch.randn(1, 1, sed_len_kv, 64, dtype=torch.float16)
output = flex_attention(query, key, value, block_mask=block_mask)

AssertionError: Torch not compiled with CUDA enabled

In [None]:
(0, 0)
██    
████  
██████


In [20]:
correlated_matrix.shape

torch.Size([100000, 500])

In [21]:
import numpy as np
import faiss

# Example data: 10000 samples, 128 features (columns)
n_samples, n_features = 10000, 128
np.random.seed(1234)
data = np.random.random((n_samples, n_features)).astype('float32')

# Normalize columns to unit length for cosine similarity
data_norm = data / np.linalg.norm(data, axis=0, keepdims=True)

# Transpose to treat columns as vectors: shape (n_features, n_samples)
vectors = data_norm.T.copy()

d = vectors.shape[1]

# Build FAISS index on columns as vectors
index = faiss.IndexFlatIP(d)
index.add(vectors)

# Query one or more columns similarly as vectors: e.g., first 3 columns
query_vectors = vectors[:3]

# Similarity threshold
threshold = 0.9

# Perform range search to find all columns with similarity >= threshold
distances, indices = index.range_search(query_vectors, threshold)

print("Indices of similar columns within threshold:", indices)
print("Similarity scores:", distances)

  data_norm = data / np.linalg.norm(data, axis=0, keepdims=True)


AttributeError: 'Tensor' object has no attribute 'copy'

In [22]:
from annoy import AnnoyIndex
import numpy as np

# Example data: 10000 samples, 128 features (columns)
# n_samples, n_features = 10000, 128
# np.random.seed(1234)
# data = np.random.random((n_samples, n_features)).astype('float32')

data = correlated_matrix

# Normalize columns to unit length for cosine similarity
data_norm = data / np.linalg.norm(data, axis=0, keepdims=True)

# Transpose to treat columns as vectors: shape (n_features, n_samples)
vectors = data_norm.T

d = vectors.shape[1]

# Build Annoy index with angular (cosine) distance
index = AnnoyIndex(d, 'angular')

# Add each column vector to index
for i in range(n_features):
    index.add_item(i, vectors[i])

# Build trees for indexing (higher number means more accuracy, higher build time)
index.build(10)

# Query first 3 columns: nearest neighbors by number (k)
k = 10
for i in range(n_features):
    neighbors = index.get_nns_by_item(i, 2, include_distances=True)
    # print(f"Neighbors of column {i}: {neighbors}")
    if neighbors[1][1] < 0.03 :
        neighbors = index.get_nns_by_item(i, k, include_distances=True)
        print(f"Neighbors of column {i}: {neighbors}")
print(f"Finished")
    

  data_norm = data / np.linalg.norm(data, axis=0, keepdims=True)


Neighbors of column 1: ([1, 38, 466, 189, 44, 376, 106, 236, 305, 287], [0.0, 0.006505327764898539, 0.007564406376332045, 0.007712687365710735, 0.00787327066063881, 0.007918594405055046, 0.007926102727651596, 0.008372271433472633, 0.008597083389759064, 0.009003445506095886])
Neighbors of column 10: ([10, 466, 44, 189, 106, 236, 376, 41, 38, 196], [0.0, 0.009036474861204624, 0.009206357412040234, 0.00941762700676918, 0.009808232076466084, 0.009832514449954033, 0.010236445814371109, 0.01024224516004324, 0.01025389414280653, 0.010950976982712746])
Neighbors of column 16: ([16, 466, 236, 106, 189, 38, 44, 376, 305, 41], [0.0, 0.00810456182807684, 0.0082213981077075, 0.008243114687502384, 0.008293597027659416, 0.008407790213823318, 0.008534412831068039, 0.00856233760714531, 0.009108764119446278, 0.009935025125741959])
Neighbors of column 22: ([22, 38, 466, 106, 189, 376, 236, 44, 305, 41], [0.0, 0.024791991338133812, 0.02481597661972046, 0.024880751967430115, 0.024966904893517494, 0.0249764

In [None]:
!pip install faiss-gpu-cu12  

In [None]:
n_samples = 100000
n_features = 500

base_col = torch.randn(n_samples, 1)  # base column vector

# Define a noise vector with a different noise scale per column
noise_scales = torch.rand(n_features) * 0.3  # e.g., noise std dev between 0 and 0.1

# Generate noise matrix with shape (n_samples, n_features)
noise = torch.randn(n_samples, n_features) * noise_scales  # automatically broadcasts noise_scales along rows

# Add different noise to each column based on noise_scales
correlated_matrix = base_col + noise

print(correlated_matrix.shape)  # (10000, 500)

# Check correlation for a few columns
corr_matrix = torch.corrcoef(correlated_matrix.T)
print(corr_matrix[:5, :5])

In [None]:
def remove_outliers(df):
    df = df.select_dtypes(include=['number']).astype(float)
    
    Q1 = df.quantile(0.25)
    Q3 = df.quantile(0.75)
    IQR = Q3 - Q1
    
    filtered = ~((df < (Q1 - 1.5 * IQR)) | (df > (Q3 + 1.5 * IQR))).any(axis=1)
    df_clean = df[filtered]
    return df_clean

df_x = remove_outliers(X_sampled)
df_y = y_sampled.loc[df_x.index]

X_train, X_test, y_train, y_test = train_test_split(df_x, df_y, test_size=0.5, random_state=42)



In [2]:
import torch

# Create an example tensor of arbitrary shape, e.g., (batch, channels, height, width)
tensor = torch.randn(4, 3, 5, 6)

# To extract and convert the last two dimensions (height and width for example),
# keep all dimensions except the last two and then convert to numpy
last_two_dims_numpy = tensor[..., :, :].reshape(-1, tensor.size(-2), tensor.size(-1)).numpy()

In [3]:
last_two_dims_numpy.shape

(12, 5, 6)

In [30]:
import faiss
d = 4
v_dim = 5

tensor1 = torch.randn(1, 1, 2, d)
tensor2 = torch.randn(1, 1, 2, d)
tensorV = torch.randn(1, 1, 2, v_dim)
print(f"tensor1: {tensor1}")
print(f"tensor2: {tensor2}")

vector1 = tensor1[0,0,:,:]
vector2 = tensor2[0,0,:,:]
index = faiss.IndexFlatIP(d)
# index.add(vector1)
index.add(vector2)

distances, labels = index.search(vector1, k = 2)

print(distances)
print(labels)

tensor1: tensor([[[[-0.1418,  0.7054,  1.0820, -0.1088],
          [ 0.8939, -0.2707,  0.1073,  0.4607]]]])
tensor2: tensor([[[[ 0.1233, -0.8613,  2.4167, -0.1384],
          [ 0.3667,  3.0996, -1.3968, -0.2889]]]])
[[ 2.0048697   0.65455794]
 [ 0.5388876  -0.79417306]]
[[0 1]
 [0 1]]


In [31]:
# print(vector1.shape)
# sum_squares_1 = np.sum(vector1.numpy()**2)
# print(sum_squares_1)

In [32]:
attn_scores = torch.zeros(1, 1, 2, 2, dtype=torch.float32)
print(f"Attention scores:{attn_scores} ")   

query_idx = torch.arange(2).unsqueeze(1).expand(2, 2) 

indices = torch.tensor(labels) 
dists = torch.tensor(distances, dtype=torch.float32)

print(f"query_idx: {query_idx} indices: {indices}, shape dists: {dists}")
print(f"Shape query_idx: {query_idx.shape} indices: {indices.shape}, shape dists: {dists.shape}")

attn_scores[0, 0, query_idx, indices]  = dists

Attention scores:tensor([[[[0., 0.],
          [0., 0.]]]]) 
query_idx: tensor([[0, 0],
        [1, 1]]) indices: tensor([[0, 1],
        [0, 1]]), shape dists: tensor([[ 2.0049,  0.6546],
        [ 0.5389, -0.7942]])
Shape query_idx: torch.Size([2, 2]) indices: torch.Size([2, 2]), shape dists: torch.Size([2, 2])


In [33]:
attn_scores

tensor([[[[ 2.0049,  0.6546],
          [ 0.5389, -0.7942]]]])

In [34]:
torch.matmul(attn_scores, tensorV)

tensor([[[[-1.4596,  1.7555, -0.5378, -1.4331,  2.4042],
          [ 0.2010, -1.5601,  1.3265,  0.4212,  0.0976]]]])

In [None]:
attention_head_outputs_scaled = torch.nn.functional.scaled_dot_product_attention(
                    tensor1,
                    tensor2,
                    tensorV
                )
attention_head_outputs_scaled

In [2]:
!pip install memory_profiler

Collecting memory_profiler
  Downloading memory_profiler-0.61.0-py3-none-any.whl.metadata (20 kB)
Downloading memory_profiler-0.61.0-py3-none-any.whl (31 kB)
Installing collected packages: memory_profiler
Successfully installed memory_profiler-0.61.0


In [7]:
from pympler import muppy, summary

# Create some objects
my_list = [1] * (10**6)

# Get all existing Python objects
all_objects = muppy.get_objects()

# Summarize memory used by types of objects
sum1 = summary.summarize(all_objects)
summary.print_(sum1)

  return isinstance(object, types.FrameType)


                       types |   # objects |   total size
                         int |     2096080 |     59.73 MB
                         str |      391375 |     57.35 MB
                        code |       91389 |     38.46 MB
                        list |       42345 |     29.47 MB
                        dict |      105440 |     29.13 MB
                        type |       11737 |     14.78 MB
                       tuple |      152257 |      9.08 MB
     collections.OrderedDict |        7437 |      3.90 MB
                         set |        5119 |      2.88 MB
           inspect.Parameter |       36592 |      2.23 MB
        asttokens.util.Token |       18200 |      2.08 MB
                 abc.ABCMeta |        1104 |      1.74 MB
                        cell |       40670 |      1.55 MB
       weakref.ReferenceType |       18888 |      1.44 MB
  builtin_function_or_method |       18262 |      1.25 MB


In [10]:
import traceback

try:
    # some code that can raise an exception
    1 / 0
except Exception as e:
    print("An error occurred:")
    # exc_type, exc_value, exc_traceback = sys.exc_info()
    # print("Printing stack trace using traceback.print_tb:")
    # traceback.print_tb(exc_traceback, limit=None, file=sys.stdout)
    traceback.print_stack()

An error occurred:


  File "/opt/anaconda3/envs/graph_temporal/lib/python3.12/runpy.py", line 198, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/opt/anaconda3/envs/graph_temporal/lib/python3.12/runpy.py", line 88, in _run_code
    exec(code, run_globals)
  File "/opt/anaconda3/envs/graph_temporal/lib/python3.12/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/opt/anaconda3/envs/graph_temporal/lib/python3.12/site-packages/traitlets/config/application.py", line 1075, in launch_instance
    app.start()
  File "/opt/anaconda3/envs/graph_temporal/lib/python3.12/site-packages/ipykernel/kernelapp.py", line 739, in start
    self.io_loop.start()
  File "/opt/anaconda3/envs/graph_temporal/lib/python3.12/site-packages/tornado/platform/asyncio.py", line 211, in start
    self.asyncio_loop.run_forever()
  File "/opt/anaconda3/envs/graph_temporal/lib/python3.12/asyncio/base_events.py", line 645, in run_forever
    self._run_once()
  File

# Gathering indices 

In [12]:
import torch

In [53]:
batch = 2
n_head = 3
q_contiguous = torch.rand(batch, n_head, 10, 10, dtype=torch.float32)
v_contiguous = torch.rand(batch, n_head, 10, 15, dtype=torch.float32)

n_similar_faiss =3
# attn_scores = torch.zeros(2, 3, 10, n_similar_faiss, dtype=torch.float32)
# attn_scores_indices = torch.zeros(2, 3, 10, n_similar_faiss, dtype=torch.float32)
attn_scores_list = []
attn_scores_indices_list = []

for b in range(q_contiguous.shape[0]):
    for h in range(q_contiguous.shape[1]):
        # query_idx = torch.arange(q_contiguous.shape[2]).unsqueeze(1).expand(q_contiguous.shape[2], n_similar_faiss)  # shape: (q_len, n_sim)
        dists = torch.rand(q_contiguous.shape[2], 3, dtype=torch.float32)
        indices = torch.randint(11, 20, (q_contiguous.shape[2], 3), dtype=torch.int64)
        # print(f"indices: {indices}")
        # print(f"dists: {dists}")
        # print(f"query_idx: {query_idx}")
        attn_scores_list.append(dists)
        attn_scores_indices_list.append(indices)
        
stacked_attn_scores = torch.stack(attn_scores_list, dim=0) 
stacked_attn_scores_indices = torch.stack(attn_scores_indices_list, dim=0) 

print(f"stacked_attn_scores: {stacked_attn_scores.shape}")
attn_scores = stacked_attn_scores.view(batch, n_head, stacked_attn_scores.shape[1], stacked_attn_scores.shape[2])
attn_scores_indices = stacked_attn_scores_indices.view(batch, n_head, stacked_attn_scores_indices.shape[1], stacked_attn_scores_indices.shape[2])

# print(attn_scores[0,0,:,:])
# print(attn_scores_indices[0,0,:,:])

print(f"attn_scores_indices.shape: {attn_scores_indices.shape}")
# Gathering values from 
indices_expanded = attn_scores_indices
indices_expanded = attn_scores_indices.unsqueeze(-1).expand(-1, -1, -1,-1, v_contiguous.size(-1))
print(f"expanded: {indices_expanded[0,0,:,:,:]}")
v_contiguous_expanded = v_contiguous.unsqueeze(-1).expand(-1, -1, -1,-1, v_contiguous.size(-1))
print(f"shape of indices_expanded: {indices_expanded.shape}")
print(f"shape of v_contiguous: {v_contiguous.shape}")
print(f"shape of v_contiguous_expanded: {v_contiguous_expanded.shape}")
# shape => (B, H, seq_len_q, n_neighbours, k_dim)

# Gather values from V along seq_len dimension (dim=2)
neighbor_values = torch.gather(v_contiguous_expanded, dim=3, index=indices_expanded)
print(f"shape of neighbor_values: {neighbor_values.shape}")

stacked_attn_scores: torch.Size([6, 10, 3])
attn_scores_indices.shape: torch.Size([2, 3, 10, 3])
expanded: tensor([[[12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12],
         [12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12],
         [16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16]],

        [[16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16],
         [15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15],
         [15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15]],

        [[16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16],
         [14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14],
         [17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17]],

        [[16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16],
         [19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19],
         [16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16]],

        [[19, 19, 19, 19, 19, 19,

RuntimeError: index 16 is out of bounds for dimension 3 with size 15

In [49]:
attention_heads = torch.einsum('bhti,bhtik->bhtk', attn_scores, neighbor_values)

attention_heads.shape

torch.Size([2, 3, 10, 15])

In [63]:
import faiss
import numpy as np

d = 3
index = faiss.IndexFlatIP(d)

arr = np.random.rand(5, 3)

# Multiply rows by -1
negated_arr = -arr

# Concatenate original and negated arrays horizontally (side-by-side)
# or vertically (stacked rows) depending on requirement

# To concatenate rows (stack vertically) -> final shape (10, 3)
vectors = np.vstack((arr, negated_arr))

query = vectors[0:1,:]
print(query)
neg_query = -query
query_vectors = np.vstack((query, neg_query))

vectors = vectors[1:6,:]
index.add(vectors)

[[0.11986817 0.71472432 0.37543603]]


In [64]:
vectors

array([[ 0.43978268,  0.6987938 ,  0.19618884],
       [ 0.38634747,  0.66174179,  0.24232956],
       [ 0.72092435,  0.31196014,  0.64357024],
       [ 0.37540066,  0.97741471,  0.33408207],
       [-0.11986817, -0.71472432, -0.37543603]])

In [65]:
# query = vectors[0:1,:]
# neg_query = -query
# query_vectors = np.vstack((query, neg_query))
query

array([[0.11986817, 0.71472432, 0.37543603]])

In [62]:
neg_query.shape

(1, 3)

In [66]:
lims, distances, labels  = index.range_search(query, thresh = 0.9)
print(f"labels: {labels}")
print(f"distances: {distances}")

labels: []
distances: []


In [67]:
lims, distances, labels  = index.range_search(neg_query, thresh = 0.9)
print(f"labels: {labels}")
print(f"distances: {distances}")

labels: []
distances: []


In [45]:
labels - arr.shape[0]

array([], dtype=int64)

In [59]:
??index.range_search

[31mSignature:[39m index.range_search(x, thresh, *, params=[38;5;28;01mNone[39;00m)
[31mSource:[39m   
    [38;5;28;01mdef[39;00m replacement_range_search(self, x, thresh, *, params=[38;5;28;01mNone[39;00m):
        [33m"""Search vectors that are within a distance of the query vectors.[39m

[33m        Parameters[39m
[33m        ----------[39m
[33m        x : array_like[39m
[33m            Query vectors, shape (n, d) where d is appropriate for the index.[39m
[33m            `dtype` must be float32.[39m
[33m        thresh : float[39m
[33m            Threshold to select neighbors. All elements within this radius are returned,[39m
[33m            except for maximum inner product indexes, where the elements above the[39m
[33m            threshold are returned[39m
[33m        params : SearchParameters[39m
[33m            Search parameters of the current search (overrides the class-level params)[39m


[33m        Returns[39m
[33m        -------[39m
[33m

In [69]:
index.search(query, 1)

(array([[0.8690071]], dtype=float32), array([[3]]))

In [70]:
index.search(neg_query, 1)

(array([[0.66615146]], dtype=float32), array([[4]]))

In [73]:
def normalize_np(X, axis=0, p=2, eps=1e-12):
    norm = np.linalg.norm(X, ord=p, axis=axis, keepdims=True)
    norm = np.maximum(norm, eps)  # prevent division by zero
    return X / norm

In [81]:
import numpy as np

# Create a random vector
v = np.random.rand(5,3)


# Normalize the vector to unit length
# v_normalized = v / norm
v_normalized = normalize_np(v, axis=1)

# Compute dot product of normalized vector with itself
dot_product = np.dot(v_normalized[0,:], v_normalized[0,:])

print("Normalized vector:", v_normalized)
print("Dot product with itself:", dot_product) 

Normalized vector: [[0.01068057 0.94921714 0.31444038]
 [0.1550259  0.14712322 0.97689392]
 [0.43094687 0.76224373 0.4829796 ]
 [0.09517516 0.14854723 0.98431469]
 [0.24727722 0.87735555 0.41121919]]
Dot product with itself: 1.0


In [76]:
dp = 0
for id, el in enumerate(v_normalized):
    dp += el * el

print(dp)

1.0000000000000002


In [None]:
normalize_np()