# Pre-trained Anc2Vec embeddings

## Install `anc2vec` package

In [None]:
!pip3 install -U pip > /dev/null
!pip3 install -U "anc2vec @ git+https://github.com/aedera/anc2vec.git" > /dev/null

## Load embeddings

This command loads embeddings constructed using the obo file release 2020-10-06.

In [None]:
import anc2vec
embeds = anc2vec.get_embeddings()

## Retriving embeddings

To retrieve the embedding of GO:0001780, we first need to obtain its index in the embedding matrix.

In [None]:
row_id = embeds['term2index']['GO:0001780'] # recover index
embeds['embeddings'][row_id]

## Calculating cosine similarity

In [None]:
from sklearn.metrics.pairwise import cosine_similarity as cosim

# similarity between two identical embeddings
term_a = embeds['term2index']['GO:0001780'] # get index
emb_a = embeds['embeddings'][term_a] # get embedding

print(cosim([emb_a], [emb_a]))

# similarity between two root terms
term_a = embeds['term2index']['GO:0008150'] # root term in BP
emb_a = embeds['embeddings'][term_a]

term_b = embeds['term2index']['GO:0003674'] # root term in MF
emb_b = embeds['embeddings'][term_b]

print(cosim([emb_a], [emb_b]))

# similarity between two children of the same parent
term_a = embeds['term2index']['GO:0044423'] # one child of GO:0005575
emb_a = embeds['embeddings'][term_a]

term_b = embeds['term2index']['GO:0032991'] # another child of GO:0005575
emb_b = embeds['embeddings'][term_b]

print(cosim([emb_a], [emb_b]))

## Visualizing embeddings weights

In [None]:
import matplotlib.pyplot as plt

plt.rcParams["figure.figsize"] = (10,10)
plt.imshow(embeds['embeddings'], interpolation='none', aspect='auto')
plt.colorbar()
plt.show()