In [1]:
!pip install fasttext



In [2]:
import fasttext.util
import fasttext
import numpy as np
from numpy import linalg as LA
import os
from scipy.spatial.distance import cdist
from scipy import sparse
from tqdm.notebook import tqdm

In [3]:
langs = {"en":"English", "af":"Afrikaans", "nl":"Dutch", "fr":"French", "pt":"Portuguese"}

# download pretrained ft models for languages of choice 
for lang_short in langs.keys():
    fasttext.util.download_model(lang_short, if_exists='ignore')  # English

In [11]:
def custom_get_sentence_vector(ft, text):
    # custom get sentence vector to avoid adding the EOS token like in ft.get_sentence_vector
    vecs = []
    norms = []
    for token in text.split():
        vec = ft.get_word_vector(token)
        vecs.append(vec)
        norms.append(LA.norm(vec))
    avged = np.mean([vec/norm if norm>0 else vec for vec, norm in zip(vecs,norms)], axis = 0)
    return avged

# generate and store embeddings for all labels for the three datasets

for dataset in ["imagenet", "places-365", "ucf-101"]:
    ds_folder = f"data/{dataset}/"
    ds_wd_folder = ds_folder+"words/"
    ds_ft_folder = ds_folder+"fasttext/"


    os.system(f"mkdir -p {ds_ft_folder}")
    
    dataset = "imagenet12988" if dataset == "imagenet" else dataset.replace("-", "")



    for lang_short, lang in langs.items():
        with open(ds_wd_folder+f"{dataset}-words-{lang}.txt", 'r') as f:
            labels = f.readlines()
        labels = [label.strip().replace("_", " ") for label in labels]    
        ft = fasttext.load_model(f'cc.{lang_short}.300.bin')

#         embeddings = np.array([ft.get_sentence_vector(label) for label in labels]) #should .get_sentence_vector be used here or should we average out all the word embeddings?
        embeddings = np.array([custom_get_sentence_vector(ft, label) for label in labels])
        np.save(ds_ft_folder+f"fasttext-{dataset}-{lang}.npy", embeddings)



In [5]:
# generate and store embeddings for object-scene label pairs for the three datasets

for lang_short, lang in langs.items():
#     with open(f"data/imagenet/words/imagenet12988-words-{lang}.txt", 'r') as f:
#         objlabels = f.readlines()
#     objlabels = [label.strip().replace("_", " ") for label in objlabels]
#     with open(f"data/places-365/words/places365-words-{lang}.txt", 'r') as f:
#         scelabels = f.readlines()
#     scelabels = [label.strip().replace("_", " ") for label in scelabels]
# #     print([objlabel+" "+scelabel for objlabel in objlabels for scelabel in scelabels][:10])
#     ft = fasttext.load_model(f'cc.{lang_short}.300.bin')
    
#     embeddings = np.array([ft.get_sentence_vector(objlabel+" "+scelabel) for objlabel in objlabels for scelabel in scelabels]) #should .get_sentence_vector be used here or should we average out all the word embeddings?
    
    imagenet_vecs = np.load(f"data/imagenet/fasttext/fasttext-imagenet12988-{lang}.npy")
    places_vecs = np.load(f"data/places-365/fasttext/fasttext-places365-{lang}.npy")
    
    
    ds_ft_folder = "data/imagenet_places/fasttext/"
    os.system(f"mkdir -p {ds_ft_folder}")
    
    # averaging the embedding for object and for scene without dividing by norm
    embeddings = np.array([np.mean([places_vec, imagenet_vec], axis = 0) for imagenet_vec in imagenet_vecs for places_vec in places_vecs])
    # also dividing by norm?
#     embeddings = np.array([np.mean([places_vec/LA.norm(places_vec), imagenet_vec/LA.norm(imagenet_vec)], axis = 0) for imagenet_vec in imagenet_vecs for places_vec in places_vecs])
    # weighted average of object and scene vectors, which does not work really well
#     lam = 0.8 # this way objects are more important than scenes
#     embeddings = np.array([ lam*places_vec+(1-lam)*imagenet_vec for imagenet_vec in imagenet_vecs for places_vec in places_vecs])
    
    np.save(ds_ft_folder+f"fasttext-imagenet12988places365pairs-{lang}.npy", embeddings)

In [4]:
#
# Pair-wise similarity between (action and scene), (action and object), (object and object), (scene and scene), (scene and object) word embeddings.
#
def wtv_mapping(wtv1, wtv2):
    wtvmap = cdist(wtv1, wtv2, metric = "cosine")
    return 1 - wtvmap


for ds1, ds2 in [("imagenet", "places-365"), ("ucf-101", "places-365"), ("ucf-101", "imagenet"), ("imagenet", "imagenet"), ("places-365", "places-365")]:
    ds1_ft_folder = f"data/{ds1}/fasttext/"
    ds2_ft_folder = f"data/{ds2}/fasttext/"
    
    ds1 = "imagenet12988" if ds1 == "imagenet" else ds1.replace("-", "")
    ds2 = "imagenet12988" if ds2 == "imagenet" else ds2.replace("-", "")
    
    for lang_short, lang in langs.items():
        ds1_emb = np.load(ds1_ft_folder+f"fasttext-{ds1}-{lang}.npy")
        ds2_emb = np.load(ds2_ft_folder+f"fasttext-{ds2}-{lang}.npy")
        
        emb2emb = wtv_mapping(ds1_emb, ds2_emb)
        
        corr = {"imagenet12988":"o", "places365":"s", "ucf101":"a"}
        
        np.save(ds1_ft_folder+f"{corr[ds1]}2{corr[ds2]}_ft_{ds2}_{lang}.npy", emb2emb)

In [6]:
# computing pairwise similarity for object-scene pairs and actions

for lang_short, lang in langs.items():
    ds1_emb = np.load(f"data/ucf-101/fasttext/fasttext-ucf101-{lang}.npy")
    ds2_emb = np.load(f"data/imagenet_places/fasttext/fasttext-imagenet12988places365pairs-{lang}.npy")

    
    emb2emb = wtv_mapping(ds2_emb, ds1_emb)
    np.save(f"data/ucf-101/fasttext/a2ospairs_ft_imagenet12988places365pairs_{lang}.npy", emb2emb.T)
    

    
    
# # compute np.mean(ospairs2ospairs,axis = 0) rather than ospairs2ospairs, since we can't compute that
# # yea no way we can compute this
#     ospairs2ospairsmeans = np.array([])
#     stepsize = 100
#     for i in tqdm(range(0, ds2_emb.shape[0], stepsize)):
#         ospairs2ospairsmeans = np.append(ospairs2ospairsmeans, np.mean(wtv_mapping(ds2_emb, ds2_emb[i:i+stepsize]), axis = 0))
#     np.save(f"data/imagenet_places/fasttext/ospairs2ospairsmean_imagenet12988places365pairs_{lang}.npy",ospairs2ospairsmeans[..., np.newaxis])

    

    
    
    
    
    # # This is part of the failed attempt at using sparse matrices to store ospairs2ospairs
#     action_nr = ds1_emb.shape[0]
#     pairs_nr = ds2_emb.shape[0]
#     ospairs2ospairs = sparse.coo_matrix((pairs_nr,pairs_nr))

#     ds_os2os_folder = f"data/imagenet_places/fasttext/ospairs2ospairs_{action_nr}/{lang}/"
#     for i in range(1,action_nr+1):
#         os.system(f"mkdir -p {ds_os2os_folder}action{i}")
#         sparse.save_npz(f"{ds_os2os_folder}/action{i}/ospairs2ospairs_imagenet12988places365pairs.npz", ospairs2ospairs)
