# Pre-trained Anc2Vec embeddings

## Install `anc2vec` package

In [None]:
!pip3 install -U pip > /dev/null
!pip3 install -U "anc2vec @ git+https://github.com/aedera/anc2vec.git" > /dev/null

  Running command git clone -q https://github.com/aedera/anc2vec.git /tmp/pip-install-0nbil39a/anc2vec_ab000f0eb1c0419b82dd87d189cec233


## Load embeddings

This command loads embeddings constructed using the Gene Ontology (release 2020-10-06).

In [None]:
import anc2vec
embeds = anc2vec.get_embeddings()

## Retrieving embeddings


In [None]:
embeds['GO:0001780']

## Calculating cosine similarity

In [None]:
from sklearn.metrics.pairwise import cosine_similarity as cosim

# similarity between two identical embeddings
emb_a = embeds['GO:0001780']

print(cosim([emb_a], [emb_a]))

# similarity between two root terms
emb_a = embeds['GO:0008150'] # root term in BP
emb_b = embeds['GO:0003674'] # root term in MF

print(cosim([emb_a], [emb_b]))

# similarity between two children of the same parent
emb_a = embeds['GO:0044423'] # one child of GO:0005575
emb_b = embeds['GO:0032991'] # another child of GO:0005575

print(cosim([emb_a], [emb_b]))

## Visualizing embeddings weights

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# make a matrix where rows are all the embeddings
allembeds = np.asarray(list(embeds.values()))

plt.rcParams["figure.figsize"] = (10,10)
plt.imshow(allembeds, interpolation='none', aspect='auto')
plt.colorbar()
plt.show()