# Explore whether framing is already embedded in GloVe

In [None]:
from collections import defaultdict

from torchtext.vocab import GloVe

## Download GloVe (takes 20+ minutes on Google Colab)

In [None]:
glove = GloVe()  # defaults to model built on 840 billion words

.vector_cache/glove.840B.300d.zip: 2.18GB [16:54, 2.14MB/s]                            
100%|█████████▉| 2195657/2196017 [04:10<00:00, 8827.19it/s]

In [None]:
glove.vectors.shape  # 2,196,017 vectors, 300 dimensions each

torch.Size([2196017, 300])

## Find dot products of all vectors with our terms of choice (not the same as cosine similarity)
GloVe (like Word2Vec) vectors are trained specifically to produce the highest dot product for words that co-occur. We can exploit this fact to see if evidence of frame blending occurs with these terms.

In [None]:
terms = ['purity', 'danger', 'contagion', 'contagious']
term_scores = defaultdict(dict)

In [None]:
# find dot product between each term and every other term
for term in terms:
    term_vector = glove.vectors[glove.stoi[term]]
    for i, v in enumerate(glove.vectors):
        s = glove.itos[i]
        product = v.matmul(term_vector)
        term_scores[term][s] = product.item()

## Print out terms that occur most in the context of each term of interest (as evidenced by the vectors of GloVe).

In [None]:
# print out the top 100 terms with the highest dot products for each term of interest
# "contagion" has a lot of economic or political terms
for term in term_scores:
    s = sorted(term_scores[term].items(), key=lambda x: x[1], reverse=True)
    print(term, '=>', ', '.join(j[0] for j in s[:100]))

purity => purity, holiness, pure, purified, purest, Purity, righteousness, purification, freshness, impurity, clarity, adware/spyware/trojans, pureness, impurities, cleanliness, sincerity, sanctity, gentleness, piety, purer, innocence, fineness, uniformity, truthfulness, hardness, humility, chastity, integrity, faithfulness, fidelity, potency, whiteness, sweetness, devotion, crystalline, wholesomeness, purifying, wholeness, unity, modesty, cleanness, simplicity, purify, godliness, honesty, qualities, radiance, impure, undefiled, perfection, homogeneity, objectivity, eternal, dignity, divine, uprightness, sinless, virtues, sinlessness, spiritual, elegance, holy, oneness, completeness, lightness, meekness, ideals, virtue, morality, sacredness, moral, compassion, adherence, serenity, salvation, tolerance, reverence, harmony, chaste, concentration, authenticity, sanctification, solubility, luster, Reinheitsgebot, Pm22.Com, sensuality, constancy, individuality, decency, sensitivity, softnes