```
def compute_projection_matrix(X, Y):
    """
    compute projection matrix of best fit, w, that transforms X to Y according to:

    Y = Xw

    (X.T X)^-1 X.T Y = [(X.T X)^-1 X.T X]w

    w = (X.T X)^-1 X.T Y
    """
```

--- 

Which one is the SBERT embedding and which the W2V embedding? 
Many torch methods say they output the transpose of vectors/matrices which made me concerned that I had the computation backward. 

In this notebook I tried it both ways and then talked with T.J. at length about linear algebra and eventually determined that the way I was already doing it was the right way. 

Huzzah. 


In [18]:
from fewshot.embeddings.transformer_embeddings import get_transformer_embeddings
import fewshot.embeddings.word_embeddings as w2v
from fewshot.models import load_transformer_model_and_tokenizer, load_word_vector_model
from fewshot.predictions import compute_predictions, compute_predictions_projection
from fewshot.utils import torch_load, to_tensor, compute_projection_matrix
from fewshot.metrics import simple_accuracy, simple_topk_accuracy
from fewshot.data.loaders import load_or_cache_data

from fewshot.path_helper import fewshot_filename

In [19]:
dataset = load_or_cache_data("data/agnews", "agnews")

Checking for cached data...
/Users/mbeck/Projects/few-shot-text-classification/data/agnews/agnews_dataset.pt


In [8]:
n_categories = len(dataset.categories)
sbert_emb_examples = dataset.embeddings[:-n_categories]
sbert_emb_labels = dataset.embeddings[-n_categories:]

In [3]:
w2v_model = load_word_vector_model(small=True, cache_dir="data/w2v")

In [15]:
import os
for topw in [1000, 10000, 100000]:
    w2v_embeddings_w2v_words, w2v_words = w2v.get_topk_w2v_vectors(w2v_model,
                                                                   k=topw)
    w2v_embeddings_w2v_words = to_tensor(w2v_embeddings_w2v_words)

    sbert_w2v_filename = fewshot_filename(
        "data/w2v", f"sbert_embeddings_for_top{topw}_w2v_words.pt"
    )
    if os.path.exists(sbert_w2v_filename):
        cached_data = torch_load(sbert_w2v_filename)
        sbert_embeddings_w2v_words = cached_data["embeddings"]
    else:
        model, tokenizer = load_transformer_model_and_tokenizer()
        sbert_embeddings_w2v_words = get_transformer_embeddings(
            w2v_words, model, tokenizer, output_filename=sbert_w2v_filename
        )

    # The way I was doing it... X = SBERT, Y = W2V
    # This yields matrix with shape (768, 300)
    projection_matrix1 = compute_projection_matrix(
        sbert_embeddings_w2v_words, w2v_embeddings_w2v_words
    )
    
    # The other way... X = W2V, Y = SBERT
    # This yields matrix with shape (300, 768) -- then take the Transpose
    projection_matrix2 = compute_projection_matrix(
        w2v_embeddings_w2v_words, sbert_embeddings_w2v_words
    ).T


    for i, proj in enumerate([projection_matrix1, projection_matrix2]):
        
        ### Compute new predictions utilizing the learned projection
        predictions = compute_predictions_projection(
            sbert_emb_examples, sbert_emb_labels, proj, k=3
        )
        score = simple_accuracy(dataset.labels, predictions)
        print(f"Score using projection matrix {i} with top {topw} w2v words: {score}")

        #score3 = simple_topk_accuracy(dataset.labels, predictions)
    print()

Score using projection matrix 0 with top 1000 w2v words: 33.25
Score using projection matrix 1 with top 1000 w2v words: 40.8421052631579

Score using projection matrix 0 with top 10000 w2v words: 60.039473684210535
Score using projection matrix 1 with top 10000 w2v words: 43.85526315789473

Score using projection matrix 0 with top 100000 w2v words: 54.53947368421053
Score using projection matrix 1 with top 100000 w2v words: 44.223684210526315



In [11]:
projection_matrix = compute_projection_matrix(sbert_embeddings_w2v_words, w2v_embeddings_w2v_words)
print(projection_matrix.size())

torch.Size([768, 300])


In [13]:
projection_matrix2 = compute_projection_matrix(w2v_embeddings_w2v_words, sbert_embeddings_w2v_words)
print(projection_matrix2.size())

torch.Size([300, 768])


In [17]:
projection_matrix == projection_matrix2

RuntimeError: The size of tensor a (300) must match the size of tensor b (768) at non-singleton dimension 1