In [1]:
!pip install fasttext



In [2]:
import fasttext.util
import fasttext
import numpy as np
from   scipy.spatial.distance import cosine

In [5]:
langs = {"en":"English", "af":"Afrikaans", "nl":"Dutch", "fr":"French", "pt":"Portuguese"}

# download pretrained ft models for languages of choice 
for lang_short in langs.keys():
    fasttext.util.download_model(lang_short, if_exists='ignore')  # English

In [7]:
# generate and store embeddings for all labels for the three datasets

for dataset in ["imagenet", "places-365", "ucf-101"]:
    ds_folder = f"data/{dataset}/"
    ds_wd_folder = ds_folder+"words/"
    ds_ft_folder = ds_folder+"fasttext/"


    !mkdir -p "$ds_ft_folder"
    
    dataset = "imagenet12988" if dataset == "imagenet" else dataset.replace("-", "")



    for lang_short, lang in langs.items():
        with open(ds_wd_folder+f"{dataset}-words-{lang}.txt", 'r') as f:
            labels = f.readlines()
        labels = [label.strip() for label in labels]    
        ft = fasttext.load_model(f'cc.{lang_short}.300.bin')

        embeddings = np.array([ft.get_sentence_vector(label) for label in labels]) #should .get_sentence_vector be used here or should we average out all the word embeddings?
        np.save(ds_ft_folder+f"fasttext-{dataset}-{lang}.npy", embeddings)



In [12]:
#
# Pair-wise similarity between (action and scene), (action and object), and (scene and object) word embeddings.
#
def wtv_mapping(wtv1, wtv2):
    wtvmap = np.zeros((wtv1.shape[0], wtv2.shape[0]), dtype=np.float32)
    for i in range(wtv1.shape[0]):
        for j in range(wtv2.shape[0]):
            wtvmap[i,j] = 1 - cosine(wtv1[i], wtv2[j])
    return wtvmap


for ds1, ds2 in [("imagenet", "places-365"),("ucf-101", "places-365"), ("ucf-101", "imagenet")]:
    ds1_ft_folder = f"data/{ds1}/fasttext/"
    ds2_ft_folder = f"data/{ds2}/fasttext/"
    
    ds1 = "imagenet12988" if ds1 == "imagenet" else ds1.replace("-", "")
    ds2 = "imagenet12988" if ds2 == "imagenet" else ds2.replace("-", "")
    
    for lang_short, lang in langs.items():
        ds1_emb = np.load(ds1_ft_folder+f"fasttext-{ds1}-{lang}.npy")
        ds2_emb = np.load(ds2_ft_folder+f"fasttext-{ds2}-{lang}.npy")
        
        emb2emb = wtv_mapping(ds1_emb, ds2_emb)
        
        corr = {"imagenet12988":"o", "places365":"s", "ucf101":"a"}
        
        np.save(ds1_ft_folder+f"{corr[ds1]}2{corr[ds2]}_ft_{ds2}_{lang}.npy", emb2emb)