# _Ref:_ Transformers need glasses! Information over-squashing in language tasks
https://arxiv.org/pdf/2406.04267

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import ollama
import pandas as pd
import seaborn as sns
from scipy import stats
from scipy.spatial.distance import cosine
from tqdm.auto import tqdm, trange

sns.set()

In [None]:
metrics = {
    "Linf": lambda x, y: np.linalg.norm(x - y, ord=np.inf),
    "L2": lambda x, y: np.linalg.norm(x - y, ord=2),
    "cos": cosine,
}

In [None]:
diff = []

for model in ["llama3.1:8b", "deepseek-coder-v2:16b"]:
    xs = np.vstack(
        [
            ollama.embeddings(model=model, prompt=("1" * i) + "0")["embedding"]
            for i in trange(1, 31, desc=model)
        ]
    )
    for m, f in metrics.items():
        ds = [f(xs[i + 1], xs[i]) for i in range(len(xs) - 1)]
        diff.append({"model": model, "metric": m, "diff": ds})

diff = pd.DataFrame(diff)

In [None]:
plt.figure(figsize=(14, 5))
for _, row in diff.query().iterrows():
    sns.lineplot(
        x=range(len(row["diff"])),
        y=row["diff"],
        linestyle="--",
        marker="o",
        label=f"{row['model']}/{row['metric']}",
    )

plt.yscale("log")
plt.grid(True, which="both")
plt.tight_layout()
pass

In [None]:
df = diff.set_index(["model", "metric"])
df

In [None]:
stats.pearsonr(
    df.loc[("deepseek-coder-v2:16b", "cos")].item(),
    df.loc[("deepseek-coder-v2:16b", "l_2")].item(),
)