# Dbpedia and Caligraph---Reading files and preprocessing

In [1]:
import os, json
import numpy as np
base_path = !pwd
base_path = base_path[0]
list_files = [base_path+"/data/caligraph/"+f for f in os.listdir(base_path+"/data/caligraph/") if os.path.isfile(base_path+"/data/caligraph/"+f)]

In [2]:
list_files

['/home/nkouagou/Documents/Universal_Embeddings/data/caligraph/caligraph-provenance.nt',
 '/home/nkouagou/Documents/Universal_Embeddings/data/caligraph/caligraph-instance-transitive-types.nt',
 '/home/nkouagou/Documents/Universal_Embeddings/data/caligraph/caligraph-instance-to-dbpedia-mappings.nt',
 '/home/nkouagou/Documents/Universal_Embeddings/data/caligraph/caligraph-instance-labels.nt',
 '/home/nkouagou/Documents/Universal_Embeddings/data/caligraph/caligraph-ontology.nt',
 '/home/nkouagou/Documents/Universal_Embeddings/data/caligraph/caligraph-instance-relations.nt',
 '/home/nkouagou/Documents/Universal_Embeddings/data/caligraph/caligraph-instance-types.nt',
 '/home/nkouagou/Documents/Universal_Embeddings/data/caligraph/caligraph-class-to-dbpedia.nt',
 '/home/nkouagou/Documents/Universal_Embeddings/data/caligraph/caligraph-instance-provenance.nt']

In [3]:
with open(list_files[2]) as file:
    caligraph2dbpedia_mappings = file.readlines()

In [4]:
def get_map(mapping):
    x,_,y,_ = mapping.split()
    return x.strip('<>'), y.strip('<>')

In [5]:
caligraph2dbpedia_mappings = dict(map(lambda x: get_map(x), caligraph2dbpedia_mappings))

In [6]:
list(caligraph2dbpedia_mappings.items())[13]

('http://caligraph.org/resource/Cameroon_sheep',
 'http://dbpedia.org/resource/Cameroon_sheep')

In [7]:
#dbpedia2caligraph_mappings = {value: key for key,value in caligraph2dbpedia_mappings.items()}

In [8]:
from gensim.test.utils import get_tmpfile
from gensim.models import KeyedVectors

In [9]:
word_vectors_caligraph = KeyedVectors.load("./Caligraph_Dbpedia/caligraph/caligraph-v211_500_4_sg_200_vectors.kv", mmap='r')

In [10]:
word_vectors_dbpedia = KeyedVectors.load("./Caligraph_Dbpedia/dbpedia/dbpedia.kv", mmap='r')

### There are mismatches between entity IRIs in 'caligraph2dbpedia_mappings' and those in the computed embeddings, see below. We will write a function that fixes it.

In [11]:
def repair_namespace(iri, kg='dbpedia'):
    if kg == 'dbpedia':
        if 'owl#' in iri:
            return iri
        iri = iri.replace('dbr:', 'http://dbpedia.org/resource/')
        return 'http://dbpedia.org/resource/' + iri.split('/')[-1]
    elif kg == 'caligraph':
        if 'owl#' in iri or 'ontology' in iri:
            return iri
        return 'http://caligraph.org/resource/' + iri.split('/')[-1]         

In [12]:
#Emb_keys_db = set(map(lambda t: repair_namespace(t), word_vectors_dbpedia.key_to_index.keys()))

In [13]:
#Emb_keys_cal = set(map(lambda t: repair_namespace(t, 'caligraph'), word_vectors_caligraph.key_to_index.keys()))

In [12]:
from tqdm import tqdm

### Creating entity to vector maps

In [13]:
entity2vec_db = {}
entity2vec_cal = {}

In [14]:
for ent in tqdm(word_vectors_dbpedia.key_to_index):
    try:
        entity2vec_db[repair_namespace(ent)] = np.array(word_vectors_dbpedia.get_vector(ent))
    except KeyError:
        if repair_namespace(ent) in entity2vec_db:
            entity2vec_db.pop(repair_namespace(ent))

100%|██████████| 15048578/15048578 [02:20<00:00, 107308.61it/s]


In [15]:
for ent in tqdm(word_vectors_caligraph.key_to_index):
    try:
        entity2vec_cal[repair_namespace(ent, 'caligraph')] = np.array(word_vectors_caligraph.get_vector(ent), )
    except KeyError:
        if repair_namespace(ent) in entity2vec_cal:
            entity2vec_cal.pop(repair_namespace(ent))

100%|██████████| 16429696/16429696 [02:39<00:00, 102926.14it/s]


In [16]:
del word_vectors_dbpedia, word_vectors_caligraph

In [17]:
#new_aligned_entity_dict = dict()
#
#for key, value in tqdm(caligraph2dbpedia_mappings.items()):
#    if key in entity2vec_cal and value in entity2vec_db:
#        new_aligned_entity_dict.update({key: value})
#print('There are ', len(new_aligned_entity_dict), ' aligned entities with available embeddings')

with open('Caligraph_Dbpedia/caligraph2dbpediaalignment.json') as file:
    new_aligned_entity_dict = json.load(file)

# Computing aligned KG embeddings with a simple neural network

In [None]:
import torch, torch.nn as nn

In [20]:
#with open('Caligraph_Dbpedia/caligraph2dbpediaalignment.json', 'w') as file:
#    json.dump(new_aligned_entity_dict, file, indent=3)

# Computing aligned KG embeddings using Orthogonal Procrustes

In [18]:
from sklearn.model_selection import train_test_split
from scipy.spatial import procrustes
from scipy.linalg import orthogonal_procrustes
import time, gc

## Get the embedding matrices of aligned an non-aligned entities

In [1]:
def get_source_and_target_matrices(alignment_dict, entity2vec1, entity2vec2, emb_dim=200, test_size=0.1):
    """This function takes the dictionary of aligned entities between two KGs and their corresponding embeddings (as entity to vector dictionaries)
    and returns S, T, S_eval, T_eval, and R defined as follows:
    
    -- S: Normalized large subset of the source embeddings, i.e. the matrix of aligned entity embeddings in the first knowledge graph
    
    -- T: Normalized large subset of the matrix of aligned entity embeddings in the second knowledge graph
    
    -- S_eval and T_eval are portions of S and T sampled for evaluation if test_size > 0
    
    -- R: The rotation matrix that most closely maps S to T, i.e. ||A@S-T|| is minimized
    """
    if test_size > 0:
        train_ents, eval_ents = train_test_split(list(alignment_dict.keys()), test_size=test_size, random_state=42)
    else:
        train_ents = alignment_dict.keys()
    
    S = np.empty((len(train_ents), emb_dim))
    T = np.empty((len(train_ents), emb_dim))
    if test_size > 0:
        S_eval = np.empty((len(eval_ents), emb_dim))
        T_eval = np.empty((len(eval_ents), emb_dim))

    for i, key in tqdm(enumerate(train_ents), total=len(train_ents), desc='Computing S and T'):
        S[i] = entity2vec1[key] if isinstance(entity2vec1, dict) else entity2vec1.loc[key].values
        T[i] = entity2vec2[alignment_dict[key]] if isinstance(entity2vec2, dict) else entity2vec2.loc[alignment_dict[key]].values
        
    if test_size > 0:
        for i, key in tqdm(enumerate(eval_ents), total=len(eval_ents), desc='Computing S_eval and T_eval'):
            S_eval[i] = entity2vec1[key] if isinstance(entity2vec1, dict) else entity2vec1.loc[key].values
            T_eval[i] = entity2vec2[alignment_dict[key]] if isinstance(entity2vec2, dict) else entity2vec2.loc[alignment_dict[key]].values
        
    print('\nNow computing R...')
    # Center and scale data
    mean_S = S.mean(axis=0)
    mean_T = T.mean(axis=0)
    scale_S = np.sqrt(((S-mean_S)**2).sum()/S.shape[0]) # scale, see https://en.wikipedia.org/wiki/Procrustes_analysis
    scale_T = np.sqrt(((T-mean_T)**2).sum()/T.shape[0])
    print('Scale S: ', scale_S)
    # Evaluation data
    #if test_size > 0:
    #    mean_S_eval = S_eval.mean(axis=0)
    #    mean_T_eval = T_eval.mean(axis=0)
    #    scale_S_eval = np.sqrt(((S_eval-mean_S_eval)**2).sum()/S_eval.shape[0]) # scale, see https://en.wikipedia.org/wiki/Procrustes_analysis
    #    scale_T_eval = np.sqrt(((T_eval-mean_T_eval)**2).sum()/T_eval.shape[0])
    
    t0 = time.time()
    R, loss = orthogonal_procrustes((S-mean_S)/scale_S, (T-mean_T)/scale_T, check_finite=True)
    print('\nCompleted after '+str(time.time()-t0)+' seconds')
    print('Alignment loss: ', loss)
    
    if test_size > 0:
        return scale_S, scale_T, mean_S, mean_T, (S-mean_S)/scale_S, (T-mean_T)/scale_T, (S_eval-mean_S)/scale_S, (T_eval-mean_T)/scale_T, R
    else:
        return scale_S, scale_T, mean_S, mean_T, (S-mean_S)/scale_S, (T-mean_T)/scale_T, R
    

In [2]:
def get_non_aligned_entity_embedding_matrices(alignment_dict, entity2vec1, entity2vec2, scale_S, scale_T, mean_S, mean_T, emb_dim=200):
    """
    Inputs the dictionary of aligned entities between two KGs and their corresponding embeddings, and returns the normalized embedding matrices of 
    
    non-aligned entities
    """
    A_neg_S = np.empty((len(entity2vec1)-len(alignment_dict), emb_dim))
    keys = sorted(set(entity2vec1.keys() if isinstance(entity2vec1, dict) else entity2vec1.index)-set(alignment_dict.keys()))
    for i, key in tqdm(enumerate(keys), total=A_neg_S.shape[0], desc='Computing A_neg_S...'):
        A_neg_S[i] = entity2vec1[key] if isinstance(entity2vec1, dict) else entity2vec1.loc[key].values
    
    B_neg_T = np.empty((len(entity2vec2)-len(alignment_dict), emb_dim))
    keys = sorted(set(entity2vec2.keys() if isinstance(entity2vec2, dict) else entity2vec2.index)-set(alignment_dict.values()))
    for i, key in tqdm(enumerate(keys), total=B_neg_T.shape[0], desc='Computing B_neg_T...'):
        B_neg_T[i] = entity2vec2[key] if isinstance(entity2vec2, dict) else entity2vec2.loc[key].values
        
    return (A_neg_S-mean_S)/scale_S, (B_neg_T-mean_T)/scale_T
    

In [3]:
from sklearn.neighbors import NearestNeighbors
import random

In [4]:
def evaluate_alignment_knn(S_eval, T_eval, R, hit_values = [1, 3, 10]):
    """The function takes the evaluation sets, i.e. correct alignments that were left out, and returns the hits@ and MRR results w.r.t. correct alignments
    
    """
    print('#'*50)
    print('Evaluation started...')
    print('#'*50)
    model = NearestNeighbors(n_neighbors=S_eval.shape[0], n_jobs=-1)
    print('Fitting...')
    model.fit(T_eval)
    print('Predicting...')
    preds = model.kneighbors(S_eval@R, n_neighbors=S_eval.shape[0], return_distance=False)
    Hits = np.empty((len(hit_values), S_eval.shape[0]))
    MRR = []
    for i in range(S_eval.shape[0]):
        pred_idx = (preds[i]==i).nonzero()[0][0] # if i in preds[i] else S_eval.shape[0]
        MRR.append(1./(pred_idx+1))
        for j in range(len(Hits)):
            if pred_idx < hit_values[j]:
                Hits[j, i] = 1.0
            else:
                Hits[j, i] = 0.0
    Hits = Hits.mean(1)
    MRR = np.mean(MRR)
    print()
    print(', '.join([f'Hits@{hit_values[it]}: {Hits[it]}' for it in range(len(Hits))]+[f'MRR: {MRR}']))

In [23]:
def evaluate_alignment(S_eval, T_eval, R, num_candidates=10):
    """The function takes the evaluation sets, i.e. correct alignments that were left out, and returns the accuracy computed as the proportion
    of correct alignment predictions among num_candidates candidates
    
    """
    print('#'*50)
    print('Evaluation started...')
    print('#'*50)
    acc = 0
    ids = list(range(S_eval.shape[0]))
    for i in tqdm(range(S_eval.shape[0])):
        s_i = S_eval[i][None, :]@R
        rand_ids = list(set(random.sample(ids, k=num_candidates))-{i})
        candidates = np.concatenate([T_eval[i][None, :], T_eval[rand_ids[:num_candidates-1]]], axis=0)
        acc += ((candidates-s_i)**2).sum(1).squeeze().argmin() == 1
    return acc / S_eval.shape[0]

## Evaluate, compute and store universal embeddings

In [24]:
_, _, _, _, _, _, S_eval, T_eval, R = get_source_and_target_matrices(new_aligned_entity_dict,\
                                                                                           entity2vec_cal, entity2vec_db, test_size=0.1)

Computing S and T: 100%|██████████| 3370708/3370708 [00:32<00:00, 104277.39it/s]
Computing S_eval and T_eval: 100%|██████████| 374524/374524 [00:02<00:00, 140962.70it/s]



Now computing R...
Scale S:  3.398963786498546

Completed after 23.055127143859863 seconds
Alignment loss:  684426.5543269366


### Evaluation on validation data

In [None]:
evaluate_alignment_knn(S_eval, T_eval, R, hit_values=[1, 3, 5, 10])

##################################################
Evaluation started...
##################################################
Fitting...
Predicting...


In [31]:
#list_merged_entities = sorted(set(entity2vec_cal.keys())-set(new_aligned_entity_dict.keys())) +\
#sorted(set(entity2vec_db.keys())-set(new_aligned_entity_dict.values())) + \
#list(new_aligned_entity_dict.keys())
#with open('Caligraph_Dbpedia/list_merged_entities_cal_db.txt', 'w') as file:
#    file.write(','.join(list_merged_entities))
#del list_merged_entities

In [None]:
scale_S, scale_T, mean_S, mean_T, S, T, R = get_source_and_target_matrices(new_aligned_entity_dict,\
                                                                           entity2vec_cal, entity2vec_db, test_size=0.0)

### Evaluation on training data

In [None]:
evaluate_alignment_knn(S, T, R, hit_values=[1, 3, 5, 10])

In [None]:
A_neg_S, B_neg_T = get_non_aligned_entity_embedding_matrices(new_aligned_entity_dict, entity2vec_cal, \
                                                             entity2vec_db, scale_S, scale_T, mean_S, mean_T)

In [None]:
del entity2vec_cal, entity2vec_db
gc.collect()

In [None]:
# compute every s_i as (s_i@R+t_i)/2
S = (S@R + T)/2
del T
gc.collect()
Universal_Emb = np.concatenate([A_neg_S@R, B_neg_T, S], axis=0)

In [None]:
np.save('Caligraph_Dbpedia/Universal_Emb.npy', Universal_Emb)

In [None]:
Universal_Emb.shape

In [None]:
del A_neg_S, B_neg_T, S, T, R, Universal_Emb
gc.collect()

# French and English Dbpedia

In [5]:
import torch, pandas as pd
import json
import numpy as np
from sklearn.model_selection import train_test_split
from scipy.spatial import procrustes
from scipy.linalg import orthogonal_procrustes
import time, gc
from sklearn.neighbors import NearestNeighbors
import random
from tqdm import tqdm

In [6]:
random.seed(42)

In [7]:
def load_embeddings(full_embedding_path, entity_id_map):
    print('Loading embeddings...')
    model = torch.load(full_embedding_path, map_location='cpu')
    with open(entity_id_map) as file:
        entity_id_map = json.load(file)
    ent_emb = pd.DataFrame(model.entity_embeddings._embeddings.weight.data.tolist(), index=list(entity_id_map.keys()))
    return ent_emb

In [8]:
fr_dbpedia_emb = load_embeddings('Fr_En_Dbpedia/Fr/embeddings/TransE/trained_model.pkl', 'Fr_En_Dbpedia/Fr/embeddings/TransE/entity_to_ids.json')
eng_dbpedia_emb = load_embeddings('Fr_En_Dbpedia/En/embeddings/TransE/trained_model.pkl', 'Fr_En_Dbpedia/En/embeddings/TransE/entity_to_ids.json')

Loading embeddings...
Loading embeddings...


In [9]:
fr_dbpedia_emb.head(3)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,190,191,192,193,194,195,196,197,198,199
0,-0.009833,-0.012834,-0.115936,-0.040705,0.076594,-0.03435,-0.020555,0.042299,0.027486,-0.074162,...,0.050411,-0.071838,0.001301,-0.057446,0.001407,0.041489,0.122214,0.017487,0.109715,-0.065048
1,-0.021399,-0.031363,-0.058264,0.04501,0.084431,-0.033651,-0.048864,0.000954,0.034992,-0.049416,...,-0.083037,-0.06391,-0.010611,-0.039704,-0.009805,0.044853,-0.029519,0.105641,0.093731,-0.015521
10,0.086678,0.013267,-0.06266,0.040125,0.051002,-0.063431,-0.060855,-0.007261,0.036402,-0.072729,...,-0.077436,-0.072368,-0.044481,-0.0727,-0.018492,0.030871,-0.049264,0.066449,0.063988,-0.029479


In [10]:
eng_dbpedia_emb.head(3)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,190,191,192,193,194,195,196,197,198,199
10500,-0.006989,0.000777,-0.113241,0.046147,0.030751,-0.080907,-0.059351,-0.129297,0.073091,-0.028684,...,-0.147443,-0.089282,-0.004109,-0.091981,0.008184,-0.005688,-0.051136,-0.028319,-0.025681,-0.146335
10501,-0.011741,0.06958,-0.141341,0.075984,0.083039,-0.02997,-0.032043,-0.016951,0.021821,0.04054,...,-0.127512,-0.049333,-0.035813,-0.0597,0.032912,0.008938,-0.022301,0.016085,0.077748,-0.144073
10502,0.019575,0.049873,-0.130476,0.071612,0.063691,-0.046625,-0.059835,-0.05737,0.06284,0.0157,...,-0.126775,-0.073543,-0.017385,-0.066693,0.02184,-0.00418,-0.035554,0.007543,0.059197,-0.19877


In [11]:
fr_dbpedia_emb.shape

(19661, 200)

In [12]:
eng_dbpedia_emb.shape

(19993, 200)

In [13]:
with open('Fr_En_Dbpedia/ref_ent_ids') as file:
    mapping = file.readlines()

In [14]:
fr_to_eng_ids = dict(list(map(lambda x: x.strip('\n').split('\t'), mapping)))

In [15]:
_, _, _, _, _, _, S_eval, T_eval, R = get_source_and_target_matrices(fr_to_eng_ids,\
                                                                                    fr_dbpedia_emb, eng_dbpedia_emb, test_size=0.1)

Computing S and T: 100%|██████████| 13500/13500 [00:02<00:00, 5301.68it/s]
Computing S_eval and T_eval: 100%|██████████| 1500/1500 [00:00<00:00, 5190.23it/s]



Now computing R...
Scale S:  0.45738879243366115

Completed after 0.11343073844909668 seconds
Alignment loss:  8099.568020182704


### Evaluate entity alignment on validation data

In [13]:
evaluate_alignment_knn(S_eval, T_eval, R, hit_values=[1, 3, 5, 10])

##################################################
Evaluation started...
##################################################
Fitting...
Predicting...

Hits@1: 0.018666666666666668, Hits@3: 0.04133333333333333, Hits@5: 0.059333333333333335, Hits@10: 0.09333333333333334, MRR: 0.0489949419006965


In [16]:
scale_S, scale_T, mean_S, mean_T, S, T, R = get_source_and_target_matrices(fr_to_eng_ids,\
                                                                           fr_dbpedia_emb, eng_dbpedia_emb, test_size=0.0)

Computing S and T: 100%|██████████| 15000/15000 [00:02<00:00, 5174.12it/s]



Now computing R...
Scale S:  0.4564733951849289

Completed after 0.12081050872802734 seconds
Alignment loss:  8966.392035984081


### Evaluate entity alignment on training data

In [17]:
evaluate_alignment_knn(S, T, R, hit_values=[1, 3, 5, 10])

##################################################
Evaluation started...
##################################################
Fitting...
Predicting...

Hits@1: 0.005266666666666667, Hits@3: 0.012133333333333333, Hits@5: 0.017466666666666665, Hits@10: 0.028066666666666667, MRR: 0.01507269147744923


In [19]:
A_neg_S, B_neg_T = get_non_aligned_entity_embedding_matrices(fr_to_eng_ids, fr_dbpedia_emb, \
                                                             eng_dbpedia_emb, scale_S, scale_T, mean_S, mean_T)

Computing A_neg_S...: 100%|██████████| 4661/4661 [00:00<00:00, 10345.03it/s]
Computing B_neg_T...: 100%|██████████| 4993/4993 [00:00<00:00, 10105.82it/s]


In [12]:
S_eval.shape

(1500, 200)

## Compute and store universal embeddings

In [None]:
## We now want true entity IRIs. We only have their key ids
#with open('Fr_En_Dbpedia/ent_ids_1') as file:
#    entity_names_map_fr = file.readlines()
#
#with open('Fr_En_Dbpedia/ent_ids_2') as file:
#    entity_names_map_eng = file.readlines()
#    
#id_to_name_fr = dict(list(map(lambda x: x.strip('\n').split('\t'), entity_names_map_fr)))
#id_to_name_eng = dict(list(map(lambda x: x.strip('\n').split('\t'), entity_names_map_eng)))
#
#true_merged_entity_names = list(map(id_to_name_fr.get, sorted(set(fr_dbpedia_emb.index)-set(fr_to_eng_ids.keys())))) + \
#                           list(map(id_to_name_eng.get, sorted(set(eng_dbpedia_emb.index)-set(fr_to_eng_ids.values())))) + \
#                           list(map(id_to_name_fr.get, list(fr_to_eng_ids.keys())))
#print(f'Total number of merged entities: {len(true_merged_entity_names)}')
#with open('Fr_En_Dbpedia/list_merged_entities_Fr_Eng_dbpedia.txt', 'w') as file:
#    file.write(','.join(true_merged_entity_names))

In [None]:
#Fr_to_Eng_entity_names = dict(zip(list(map(id_to_name_fr.get, fr_to_eng_ids.keys())),\
#                                 list(map(id_to_name_eng.get, fr_to_eng_ids.values()))))
#
#Eng_to_Fr_entity_names = {value:key for key,value in Fr_to_Eng_entity_names.items()}
#
#with open('Fr_En_Dbpedia/Fr_to_Eng_entity_names.json', 'w') as file:
#    json.dump(Fr_to_Eng_entity_names, file, ensure_ascii=False)
#    
#with open('Fr_En_Dbpedia/Eng_to_Fr_entity_names.json', 'w') as file:
#    json.dump(Eng_to_Fr_entity_names, file, ensure_ascii=False)

with open('Fr_En_Dbpedia/Fr_to_Eng_entity_names.json') as file:
    Fr_to_Eng_entity_names = json.load(file)
    
with open('Fr_En_Dbpedia/Eng_to_Fr_entity_names.json') as file:
    Eng_to_Fr_entity_names = json.load(file)

In [None]:
# compute every s_i as (s_i@R+t_i)/2
S = (S@R + T)/2
gc.collect()
Universal_Emb = np.concatenate([A_neg_S@R, B_neg_T, S], axis=0)

In [None]:
np.save('Fr_En_Dbpedia/Universal_Emb.npy', Universal_Emb)

## Shallom embeddings for Fr-En Dbpedia

In [104]:
with open('Fr_En_Dbpedia/Fr_to_Eng_entity_names.json') as file:
    Fr_to_Eng_entity_names = json.load(file)
    
with open('Fr_En_Dbpedia/Eng_to_Fr_entity_names.json') as file:
    Eng_to_Fr_entity_names = json.load(file)
    

In [100]:
Fr_shallom_embs = pd.read_csv('Shallom_EN_FR_15K_V2/Shallom_entity_embeddings.csv').set_index('Unnamed: 0')
En_shallom_embs = pd.read_csv('Shallom_EN_FR_15K_V1/Shallom_entity_embeddings.csv').set_index('Unnamed: 0')

In [106]:
#scale_S, scale_T, mean_S, mean_T, S, T, S_eval, T_eval, R = get_source_and_target_matrices(Fr_to_Eng_entity_names,\
#                                                                                           Fr_shallom_embs, En_shallom_embs, emb_dim=300, test_size=0.1)

In [101]:
Fr_shallom_embs

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,290,291,292,293,294,295,296,297,298,299
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
http://dbpedia.org/resource/E734345,0.400112,0.534709,0.071111,0.613100,0.505895,0.062466,-0.655917,0.023994,0.203360,0.244213,...,-0.170001,0.068271,0.702006,0.169320,-0.008014,0.074258,0.110304,-0.125234,0.006380,-0.015109
http://dbpedia.org/resource/E631877,0.032558,0.346942,-0.020158,1.068129,0.176200,0.051450,-0.015563,-0.073449,-0.013392,0.201987,...,-0.073894,-0.229773,0.734738,0.346088,0.218054,0.196377,0.026112,0.104452,0.115188,-0.288138
http://dbpedia.org/resource/E835634,0.224423,0.673174,-0.214184,-1.345313,-1.632166,0.388177,-0.609411,0.798192,-1.648671,-1.052322,...,-0.161651,-0.506389,-0.261453,-0.329333,-0.149957,-0.243873,0.080034,1.588323,-1.342435,0.125716
http://dbpedia.org/resource/E192533,-0.651348,0.500066,-0.432704,0.578025,0.293052,0.983297,0.554792,-1.567172,-0.833076,0.544149,...,-0.662414,1.277197,0.498245,0.873016,-0.421230,0.160020,-1.208419,-0.283461,0.304016,0.168088
http://dbpedia.org/resource/E983139,0.147799,-0.064575,-0.153225,0.633950,0.410206,0.059652,-0.004860,0.256223,0.078055,0.081826,...,-0.539466,0.194111,0.099882,0.343483,0.284601,0.261219,-0.013022,0.139724,-0.400349,0.150289
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
http://fr.dbpedia.org/resource/E349865,0.069365,-0.032303,0.015863,-0.546487,0.071343,0.077314,-0.002521,-0.084041,0.112056,-0.049597,...,-0.260077,0.254835,0.066985,-0.068249,0.129796,-0.152903,0.199920,0.517644,0.036564,0.045849
http://fr.dbpedia.org/resource/E910091,-0.105166,-0.181788,-0.522520,1.127652,0.726868,0.605436,0.075351,0.493463,0.171665,0.713751,...,0.061312,0.487828,0.554511,-0.062443,-0.204439,0.036049,0.270745,0.748696,-0.096218,0.241323
http://fr.dbpedia.org/resource/E556586,-0.195786,-0.090162,0.024897,0.001877,-0.174414,0.086327,-0.146154,-0.011048,0.069752,0.291241,...,-0.447972,-0.000892,0.167914,0.212014,0.102296,-0.118306,0.262949,0.141912,0.047056,0.176921
http://fr.dbpedia.org/resource/E227084,-0.630472,-0.284155,-0.222604,-0.525567,-0.302441,-0.476612,0.705250,-0.486887,0.514701,-0.525051,...,0.964537,-0.419580,0.825889,0.580359,0.217606,0.425208,0.728615,-0.271990,0.384967,0.083909


In [102]:
Fr_shallom_embs.index

Index(['http://dbpedia.org/resource/E734345',
       'http://dbpedia.org/resource/E631877',
       'http://dbpedia.org/resource/E835634',
       'http://dbpedia.org/resource/E192533',
       'http://dbpedia.org/resource/E983139',
       'http://dbpedia.org/resource/E564319',
       'http://dbpedia.org/resource/E274912',
       'http://dbpedia.org/resource/E843418',
       'http://dbpedia.org/resource/E211652',
       'http://dbpedia.org/resource/E714773',
       ...
       'http://fr.dbpedia.org/resource/E177251',
       'http://fr.dbpedia.org/resource/E591614',
       'http://fr.dbpedia.org/resource/E701111',
       'http://fr.dbpedia.org/resource/E653774',
       'http://fr.dbpedia.org/resource/E192668',
       'http://fr.dbpedia.org/resource/E349865',
       'http://fr.dbpedia.org/resource/E910091',
       'http://fr.dbpedia.org/resource/E556586',
       'http://fr.dbpedia.org/resource/E227084',
       'http://fr.dbpedia.org/resource/E723293'],
      dtype='object', name='Unnamed: 0

In [103]:
En_shallom_embs.index

Index(['http://dbpedia.org/resource/E399772',
       'http://dbpedia.org/resource/E398558',
       'http://dbpedia.org/resource/E537780',
       'http://dbpedia.org/resource/E410701',
       'http://dbpedia.org/resource/E649085',
       'http://dbpedia.org/resource/E647903',
       'http://dbpedia.org/resource/E089170',
       'http://dbpedia.org/resource/E516555',
       'http://dbpedia.org/resource/E592019',
       'http://dbpedia.org/resource/E048330',
       ...
       'http://dbpedia.org/resource/E506454',
       'http://dbpedia.org/resource/E979536',
       'http://dbpedia.org/resource/E292025',
       'http://dbpedia.org/resource/E105994',
       'http://dbpedia.org/resource/E830356',
       'http://dbpedia.org/resource/E891068',
       'http://dbpedia.org/resource/E639002',
       'http://dbpedia.org/resource/E820583',
       'http://dbpedia.org/resource/E574782',
       'http://dbpedia.org/resource/E712661'],
      dtype='object', name='Unnamed: 0', length=15000)