# Pre-Work

## Mount Google Drive

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
%cd gdrive/MyDrive/Machine_Learning/Code/

/content/gdrive/MyDrive/Machine_Learning/Code


## Install Modules

In [3]:
%%capture 
# Install AmpliGraph library
! pip install ampligraph

# Required to visualize embeddings with tensorboard projector, comment out if not required!
#! pip install --user tensorboard

# Required to plot text on embedding clusters, comment out if not required!
#! pip install --user git+https://github.com/Phlya/adjustText

In [4]:
%tensorflow_version 1.x

TensorFlow 1.x selected.


## Import Modules

In [5]:
import tensorflow as tf 

print('TensorFlow  version: {}'.format(tf.__version__))

# Get the GPU name
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

TensorFlow  version: 1.15.2
Found GPU at: /device:GPU:0


In [6]:
import ampligraph
import os
import pickle
import json
import random
from tqdm import tqdm
import numpy as np
import pandas as pd
import tensorflow as tf
from ampligraph.evaluation import train_test_split_no_unseen, evaluate_performance, mr_score, mrr_score, hits_at_n_score, select_best_model_ranking
from ampligraph.discovery import query_topn, discover_facts, find_clusters
from ampligraph.latent_features import TransE, ComplEx, HolE, DistMult, ConvE, ConvKB
from ampligraph.utils import save_model, restore_model

def display_aggregate_metrics(ranks):
    print('Mean Rank:', mr_score(ranks)) 
    print('Mean Reciprocal Rank:', mrr_score(ranks)) 
    print('Hits@1:', hits_at_n_score(ranks, 1))
    print('Hits@10:', hits_at_n_score(ranks, 10))
    print('Hits@100:', hits_at_n_score(ranks, 100))

print('Ampligraph version: {}'.format(ampligraph.__version__))

Ampligraph version: 1.4.0


In [7]:
from ampligraph.latent_features import set_entity_threshold
set_entity_threshold(800000)

# Process Data

## Read Data as DataFrame

In [None]:
os.getcwd()

'/content/gdrive/My Drive/Machine_Learning/Code'

In [None]:
dataset = pd.read_csv("./Data/Dataset_Mongo_CTEL/FullGraph_DataFrame.csv")

In [None]:
dataset = dataset.drop(columns=['Unnamed: 0'])

In [None]:
dataset['Object'] = dataset['Object'].astype(str)

In [None]:
dataset['Subject'] = dataset['Subject'].astype(str)

In [None]:
print('Total triples in the KG:', dataset.shape)

Total triples in the KG: (19461258, 3)


In [None]:
dataset.head(20)

Unnamed: 0,Subject,Predicate,Object
0,CÔNG TY TNHH SẢN XUẤT THƯƠNG MẠI XÂY DỰNG PHÚC...,Have_Industry,Dịch vụ liên quan đến in
1,CÔNG TY TNHH SẢN XUẤT THƯƠNG MẠI XÂY DỰNG PHÚC...,Have_Industry,"Sản xuất sắt, thép, gang"
2,CÔNG TY TNHH SẢN XUẤT THƯƠNG MẠI XÂY DỰNG PHÚC...,Have_Industry,Đúc sắt thép
3,CÔNG TY TNHH SẢN XUẤT THƯƠNG MẠI XÂY DỰNG PHÚC...,Have_Industry,Gia công cơ khí; xử lý và tráng phủ kim loại
4,CÔNG TY TNHH SẢN XUẤT THƯƠNG MẠI XÂY DỰNG PHÚC...,Have_Industry,Sản xuất sản phẩm khác bằng kim loại chưa được...
5,CÔNG TY TNHH SẢN XUẤT THƯƠNG MẠI XÂY DỰNG PHÚC...,Have_Industry,Sản xuất khác chưa được phân vào đâu
6,CÔNG TY TNHH SẢN XUẤT THƯƠNG MẠI XÂY DỰNG PHÚC...,Have_Industry,Xây dựng nhà để ở
7,CÔNG TY TNHH SẢN XUẤT THƯƠNG MẠI XÂY DỰNG PHÚC...,Have_Industry,Xây dựng nhà không để ở
8,CÔNG TY TNHH SẢN XUẤT THƯƠNG MẠI XÂY DỰNG PHÚC...,Have_Industry,Xây dựng công trình điện
9,CÔNG TY TNHH SẢN XUẤT THƯƠNG MẠI XÂY DỰNG PHÚC...,Have_Industry,"Xây dựng công trình cấp, thoát nước"


## Create Train, Test Spilt

In [None]:
len(dataset.values)

19461258

In [None]:
# get the validation set of size 0.1
test_train, X_valid = tqdm(train_test_split_no_unseen(dataset.values, test_size = 0.1, seed=0))

# get the test set of size 0.1 from the remaining triples
X_train, X_test = train_test_split_no_unseen(test_train, test_size = 0.1, seed=0)

print('Total triples:', dataset.shape)
print('Size of train:', X_train.shape)
print('Size of valid:', X_valid.shape)
print('Size of test:', X_test.shape)

100%|██████████| 2/2 [00:00<00:00, 5429.52it/s]


Total triples: (19461258, 3)
Size of train: (15763620, 3)
Size of valid: (1946125, 3)
Size of test: (1751513, 3)


## Pickle Export Train, Valid, Test set

In [None]:
pickle.dump(X_train, open( "./Data/Dataset_Mongo_CTEL/Train_SPO_Array.pkl", "wb" ))

In [None]:
pickle.dump(X_valid, open( "./Data/Dataset_Mongo_CTEL/Valid_SPO_Array.pkl", "wb" ))

In [None]:
pickle.dump(X_test, open( "./Data/Dataset_Mongo_CTEL/Test_SPO_Array.pkl", "wb" ))

# Model Training

## Import Pickle Train, Valid, Test Splitted Sets

In [8]:
Imp_X_train = pickle.load(open( "./Data/Dataset_Mongo_CTEL/Train_SPO_Array.pkl", "rb" ))

In [9]:
len(Imp_X_train)

15763620

In [10]:
Imp_X_valid = pickle.load(open( "./Data/Dataset_Mongo_CTEL/Valid_SPO_Array.pkl", "rb" ))

In [11]:
Imp_X_test = pickle.load(open( "./Data/Dataset_Mongo_CTEL/Test_SPO_Array.pkl", "rb" ))

## Create an entities subset to tune model

In [None]:
corrupted_entities = pickle.load(open('./Data/Dataset_Mongo_CTEL/CompanyEntities_list.pkl', 'rb'))

In [None]:
len(corrupted_entities)

681207

In [None]:
small_corrupted_entities = random.sample(corrupted_entities, 10000)

In [None]:
small_corrupted_entities[:50]

['CÔNG TY TNHH THƯƠNG MẠI VÀ XÂY DỰNG QUỲNH THÁI',
 'CÔNG TY CỔ PHẦN THƯƠNG MẠI QUỐC TẾ TUYẾT HƯƠNG',
 'CÔNG TY CỔ PHẦN MELY FOODS',
 'CÔNG TY TNHH NSK VIỆT NAM',
 'CÔNG TY TNHH SƠN I-COLOR QUỐC TẾ',
 'CÔNG TY TNHH SRITHAI (HÀ NỘI).',
 'CÔNG TY TNHH TOMOMI VIỆT NAM',
 'CÔNG TY TNHH SẢN XUẤT THƯƠNG MẠI QUẢNG CÁO NGUYỄN HÙNG',
 'CÔNG TY TNHH SẢN XUẤT - NHẬP KHẨU THỦY SẢN THIÊN LONG',
 'CÔNG TY TNHH ĐẦU TƯ- THIẾT KẾ XÂY DỰNG HOÀNG LONG',
 'CÔNG TY TNHH THIẾT KẾ & XÂY DỰNG KHANG MINH',
 'CÔNG TY TNHH MỘT THÀNH VIÊN XÂY DỰNG THẮNG TOÀN PHÁT',
 'CÔNG TY TNHH THƯƠNG MẠI VÀ ĐẦU TƯ SANSAN',
 'CÔNG TY CP KHÍ CÔNG NGHIỆP KHU VỰC 1 VIỆT NAM',
 'CÔNG TY TNHH THƯƠNG MẠI NHÀ HÀNG MÂY NGÀN',
 'CÔNG TY TNHH TRIỆU ANH ĐỨC',
 'CÔNG TY TNHH THƯƠNG MẠI VÀ ĐẦU TƯ VIỆT TRUNG',
 'CÔNG TY TNHH THƯƠNG MẠI PHÒNG TRINH',
 'CÔNG TY TNHH HỌC VIỆN THE YOUTH',
 'CÔNG TY TNHH MỘT THÀNH VIÊN TƯ VẤN VÀ ĐÀO TẠO GIÁO DỤC VIỆT MỸ',
 'CÔNG TY TNHH TM DV SỬA CHỮA TÀU BIỂN HIẾU ĐẠT',
 'CÔNG TY TNHH MỘT THÀNH VIÊN XÂY DỰNG THI

## Train Model

### Train TransE model without early stopping

In [None]:
# model_TransE = TransE(k=150,                                                      # embedding size
#                epochs=100,                                                        # Num of epochs
#                batches_count= 32,                                                 # Number of batches 
#                eta=1,                                                            # number of corruptions to generate during training
#                loss='pairwise', loss_params={'margin': 1},                        # loss type and it's hyperparameters         
#                initializer='xavier', initializer_params={'uniform': False},       # initializer type and it's hyperparameters
#                regularizer='LP', regularizer_params= {'lambda': 0.001, 'p': 3},   # regularizer along with its hyperparameters
#                optimizer= 'adam', optimizer_params= {'lr': 0.001},                # optimizer to use along with its hyperparameters
#                seed= 0, verbose=True)

# model_TransE.fit(Imp_X_train)

### Train ComplEx model without early stopping

In [None]:
model_ComplEx = ComplEx(k=100, epochs=100, eta=1, loss='multiclass_nll', 
                initializer='xavier', initializer_params={'uniform': False},
                regularizer='LP', regularizer_params= {'lambda': 0.0001, 'p': 3},
                optimizer= 'adam', optimizer_params= {'lr': 0.001}, 
                seed= 0, batches_count= 32, verbose=True)

model_ComplEx.fit(Imp_X_train)

  num_elements)


### Train ConvE model without early stopping

In [None]:
# model_ConvE = ConvE(k=100, epochs=10, loss='bce', 
#                 initializer='xavier', initializer_params={'uniform': False},
#                 regularizer='LP', regularizer_params= {'lambda': 0.001, 'p': 3},
#                 optimizer= 'adam', optimizer_params= {'lr': 0.001}, 
#                 seed= 0, batches_count= 20, verbose=True)

# model_ConvE.fit(Imp_X_train)



NotImplementedError: ignored

# Tune Model

## Generate test dataset

In [None]:
test_dataset = Imp_X_test[np.random.choice(Imp_X_test.shape[0], 1000, replace=False), :]
test_dataset

array([['CÔNG TY TNHH QUẢNG CÁO TRUYỀN THÔNG GREENSUN', 'Have_Industry',
        'Hoạt động sản xuất phim điện ảnh, phim video và chương trình truyền hình'],
       ['CÔNG TY TNHH ĐẦU TƯ & XÂY DỰNG NHÀ VIỆT RBQ', 'Have_Industry',
        'Bán buôn chuyên doanh khác chưa được phân vào đâu'],
       ['CÔNG TY TNHH VẬN TẢI QUỐC TẾ AURORA', 'Have_Industry',
        'Hoạt động dịch vụ hỗ trợ khác liên quan đến vận tải'],
       ...,
       ['CÔNG TY CP TƯ VẤN XÂY DỰNG VHP 112', 'Have_Industry',
        'Lắp đặt hệ thống điện'],
       ['CÔNG TY TNHH DỊCH VỤ THƯƠNG MẠI THÀNH ĐẠT LINH',
        'Have_Industry', 'Bán buôn sản phẩm thuốc lá, thuốc lào'],
       ['CÔNG TY TNHH KẾ TOÁN VÀ ĐẠI LÝ THUẾ ATCS', 'Have_Scale_Equity',
        'H_SE']], dtype=object)

In [None]:
type(test_dataset)

numpy.ndarray

In [None]:
X_filter = np.concatenate([Imp_X_train, Imp_X_valid, Imp_X_test], 0)

In [None]:
type(X_filter)

numpy.ndarray

In [None]:
X_filter

array([['CÔNG TY TNHH THƯƠNG MẠI & DỊCH VỤ THẾ GIỚI CHIP',
        'Have_Industry', 'Đúc sắt thép'],
       ['CÔNG TY TNHH SẢN XUẤT VÀ THƯƠNG MẠI GIA HỒ', 'Have_Industry',
        'Bán lẻ thực phẩm trong các cửa hàng chuyên doanh'],
       ['CÔNG TY TNHH THIẾT BỊ KHOA HỌC VÀ CÔNG NGHỆ TM',
        'Have_Industry',
        'Bán buôn vật liệu, thiết bị lắp đặt khác trong xây dựng'],
       ...,
       ['CÔNG TY TNHH C&N HOÀNG KIM', 'Have_Industry',
        'Bán phụ tùng và các bộ phận phụ trợ của ô tô và xe có động cơ khác'],
       ['CÔNG TY TNHH THƯƠNG MẠI DỊCH VỤ LIÊN TRỰC', 'Have_Industry',
        'Bán lẻ thực phẩm trong các cửa hàng chuyên doanh'],
       ['CÔNG TY TNHH ĐẦU TƯ VÀ PHÁT TRIỂN ZEN ĐÀ LẠT SÀI GÒN',
        'Have_Industry',
        'Bán buôn nông, lâm sản nguyên liệu (trừ gỗ, tre, nứa) và động vật sống']],
      dtype=object)

## Evaluate Model

### Evaluate TransE Model

In [None]:
X_filter = np.concatenate([Imp_X_train, Imp_X_valid, Imp_X_test], 0)

In [None]:
ranks_TransE = evaluate_performance(Imp_X_test, 
                             model=model_TransE,
                             filter_triples=X_filter,
                             corrupt_side='s,o',
                             ranking_strategy='worst'
                             )
display_aggregate_metrics(ranks_TransE)

OperationalError: ignored

### Evaluate ComplEx Model

In [None]:
type(small_corrupted_entities)

list

In [None]:
ranks_ComplEx = evaluate_performance(test_dataset, 
                             model=model_ComplEx,
                             filter_triples=X_filter,
                             corrupt_side='s',
                             ranking_strategy='worst',
                             entities_subset = small_corrupted_entities)
display_aggregate_metrics(ranks_ComplEx)

OperationalError: ignored

## Grid Search Model

### Grid Search On TransE Model

In [None]:
np.arange(33)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32])

In [None]:
model_class = TransE
param_grid = {
                     "batches_count": np.arange(33),
                     "seed": 0,
                     "epochs": [50, 100, 150],
                     "k": [50, 100, 150],
                     "eta": np.arange(31),
                     "loss": ["multiclass_nll"],
                     "loss_params": {},
                     "embedding_model_params": {},
                     "regularizer": ["LP"],
                     "regularizer_params": {
                         "p": [3],
                         "lambda": [1e-3]
                      },
                     "optimizer": ["adam"],
                     "optimizer_params":{
                         "lr": 0.001 #lambda: np.random.uniform(0.00001, 0.01)
                     },
                     "verbose": False
                 }
best_model, best_params, best_mrr_train, ranks_test, mrr_test, experimental_history = \
        select_best_model_ranking(model_class, 
                          Imp_small_X_train, 
                          Imp_small_X_valid, 
                          Imp_small_X_test, 
                          param_grid,
                          # max_combinations=2, # performs random search-executes 2 models by randomly choosing params
                          use_filter=True
                          #verbose=True,
                          #early_stopping=True
                          )

  batch_size = int(np.ceil(self.train_dataset_handle.get_size("train") / self.batches_count))


KeyboardInterrupt: ignored

In [None]:
# params of the best model
best_params

### Grid Search on ComplEx Model

# Process Embedding

## Retrieve Embedding Graph of ComplEx model

In [None]:
#Entities List
emb_ComplEx_lis = pickle.load(open( "./Data/Dataset_Mongo_CTEL/CompanyEntities_list.pkl", "rb" ))

In [None]:
emb_ComplEx_df = pd.DataFrame(model_ComplEx.get_embeddings(emb_ComplEx_lis))

In [None]:
emb_ComplEx_df.shape

(681207, 200)

In [None]:
emb_ComplEx_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199
0,-0.052237,-0.059066,0.118238,0.048043,-0.034403,-0.068949,0.038420,-0.066451,0.081961,0.154402,-0.176285,0.191690,0.163241,0.134366,0.123907,-0.016879,-0.122289,-0.064665,-0.097054,-0.127831,-0.058380,-0.033866,-0.150440,0.068372,-0.289178,-0.155540,-0.157157,-0.090551,0.166747,-0.173755,0.163210,0.027935,-0.118157,0.090921,0.258034,-0.161996,-0.174197,-0.288130,0.170727,0.161341,...,0.077070,0.165099,0.052819,0.058434,-0.023661,-0.075976,-0.230423,0.224076,-0.108932,-0.094282,0.059522,0.004987,0.058631,-0.063153,0.016657,0.185931,-0.157009,-0.112975,0.282599,0.046166,0.240753,-0.011956,-0.122955,0.065128,-0.127971,-0.078808,-0.299928,0.081597,0.012873,-0.029069,-0.060818,-0.214210,0.143088,-0.209545,0.004412,0.122577,-0.138092,-0.186553,0.067301,-0.067481
1,-0.125481,-0.039067,0.018716,-0.219729,0.112690,0.072065,-0.050430,0.182721,-0.258364,0.094138,0.028367,0.099247,-0.011893,0.041884,-0.123793,0.136895,-0.226703,0.195917,0.022726,0.265306,0.075646,-0.053214,0.238317,-0.197167,-0.270518,0.035733,0.091263,-0.089628,-0.022787,-0.013000,0.128942,0.095220,-0.068679,-0.006171,0.064531,0.058201,-0.046571,-0.189778,0.030529,-0.219172,...,-0.181022,0.114322,-0.088008,-0.161971,0.176646,0.074591,0.022078,0.137819,0.000917,0.033298,-0.064543,-0.030522,0.098684,0.078250,-0.101216,0.039535,-0.055910,-0.043319,-0.187493,0.104316,0.237763,0.122140,0.304802,-0.089906,-0.066237,-0.031563,0.124794,-0.162425,-0.241522,-0.049984,-0.026863,0.296933,-0.163083,-0.210401,-0.371576,-0.314500,0.054876,-0.144073,-0.018042,-0.056627
2,-0.138770,0.185649,0.039368,-0.210411,-0.091143,-0.204135,-0.010756,0.093454,-0.117575,-0.005485,-0.006993,0.076912,-0.042594,0.041718,-0.166280,0.169172,-0.207704,0.046535,0.125956,0.005678,-0.032128,0.055026,0.152256,-0.111438,-0.264263,0.096022,-0.085616,-0.056205,0.135718,-0.044641,0.142805,0.010698,-0.028616,0.013882,0.158973,-0.129862,-0.092910,-0.122831,-0.024539,-0.078039,...,-0.102687,0.093672,-0.029732,-0.096003,0.016044,-0.007491,-0.124291,0.193323,-0.086968,-0.018757,0.016104,-0.026814,0.102509,0.080777,-0.113809,0.080912,-0.052152,-0.043310,0.079160,0.119818,0.332641,0.098397,0.174328,0.069158,-0.054143,0.034531,0.023097,-0.180757,-0.225164,0.008320,0.007718,0.057561,-0.100311,-0.170235,-0.068363,-0.117709,0.123843,-0.168513,-0.020739,-0.021628
3,0.072254,-0.031141,-0.018090,-0.002236,0.079224,0.162060,-0.007721,-0.003848,0.149846,0.003554,-0.041662,-0.159116,0.154785,0.082013,-0.141867,0.092127,-0.106824,-0.060184,-0.106826,0.084941,-0.050312,0.046989,-0.053225,0.047956,0.020806,0.032035,-0.135213,0.085560,-0.038333,-0.112965,0.068980,-0.012779,-0.154964,0.065694,0.207953,-0.053651,0.010924,0.274118,0.046686,0.030790,...,0.041615,0.139178,-0.098631,-0.007435,-0.164626,-0.034826,0.144366,0.126235,-0.059960,-0.118752,0.252950,-0.088697,-0.034097,-0.159532,-0.055968,0.000697,-0.002502,-0.058182,-0.072988,-0.061673,0.171986,0.144178,0.163355,0.100768,-0.035621,-0.048936,-0.166665,0.150919,-0.107104,0.238559,0.063964,0.097738,-0.020909,0.202035,-0.243412,-0.110486,0.029511,-0.122202,0.019171,-0.060294
4,-0.149311,0.041282,0.046521,-0.049896,-0.048305,0.059775,0.063782,0.034884,-0.135699,0.002751,-0.144603,0.221396,0.168255,0.088867,-0.186709,0.112967,-0.174658,-0.070809,0.064146,0.170428,0.016414,-0.029356,0.009504,0.049495,-0.158734,0.037417,-0.218275,-0.024408,-0.204240,-0.149246,0.110817,-0.014936,-0.192966,0.100766,0.082037,-0.161669,-0.059427,-0.260321,0.073861,-0.252581,...,0.008188,0.095351,0.069584,-0.073067,-0.124356,-0.002011,0.264199,0.175187,0.193914,-0.070160,0.105741,-0.159164,0.114338,-0.087972,-0.005539,0.136939,0.013894,-0.081254,-0.088027,0.301789,-0.065961,0.137587,-0.048455,0.078135,-0.063846,-0.085926,-0.174347,-0.159511,-0.057863,-0.213454,0.115236,0.063462,0.098493,-0.136599,0.082335,-0.054864,-0.091930,-0.065195,0.159135,-0.022920
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
681202,-0.330457,0.240275,0.079684,0.111413,-0.221805,-0.112855,0.154353,-0.026063,-0.086802,0.073306,-0.106022,0.176231,0.166503,0.201197,0.076130,-0.060673,-0.143906,-0.136782,-0.058206,0.002606,-0.178780,-0.035604,0.213667,0.126296,0.064232,-0.129769,-0.053054,-0.046527,0.079670,-0.158563,0.045381,0.175346,-0.147020,-0.183472,-0.163839,-0.179334,-0.208988,-0.282328,0.168081,0.088265,...,-0.007726,0.197592,-0.075466,-0.038456,-0.052103,0.256907,-0.105300,0.106450,-0.133957,-0.118369,-0.071044,0.079197,0.096183,-0.111161,-0.179605,0.230046,0.080500,-0.123317,0.129995,0.027051,-0.177690,0.252105,-0.126283,0.140180,-0.188045,-0.179670,0.083578,0.179677,-0.123659,0.023338,-0.129615,-0.139628,0.154259,0.109748,0.121405,0.090826,-0.045538,-0.184081,0.163525,-0.118748
681203,0.199837,-0.086101,0.218265,0.247340,-0.163031,0.129996,-0.011772,0.161731,0.007862,0.012935,-0.145378,0.224779,0.192686,0.162470,0.112847,0.090585,-0.021475,-0.137995,-0.237577,-0.116919,0.071865,-0.124860,-0.313792,0.168297,0.090194,-0.066259,-0.018330,-0.309240,-0.020073,-0.247844,0.111203,0.049491,-0.098220,-0.068846,-0.148536,-0.073844,-0.155658,-0.122311,0.155825,0.118541,...,0.092716,0.195523,-0.026787,0.141331,-0.181458,0.131537,0.163991,0.188486,0.021968,-0.028276,-0.182556,-0.143257,0.075718,-0.202651,-0.083293,0.076998,0.081050,-0.174179,0.037222,-0.021704,0.050553,0.190684,-0.134494,0.156810,-0.194606,-0.082613,-0.124713,0.248222,-0.115523,-0.167020,0.116408,-0.134468,0.269776,0.163616,0.033784,0.208051,-0.196845,-0.190812,0.205248,-0.159826
681204,0.158090,-0.082478,0.248552,0.095657,-0.020005,0.013149,0.264828,0.000798,0.203053,-0.062906,-0.168161,0.120544,0.203646,0.219838,0.037322,-0.132628,0.304337,-0.187763,-0.122638,-0.008524,-0.054248,-0.182999,-0.084548,0.157199,0.140811,-0.208160,0.055281,-0.160856,0.084410,-0.208019,0.143041,0.195488,-0.025611,-0.157403,0.069181,-0.034481,-0.233687,0.125107,0.227763,0.057587,...,-0.052312,0.255336,0.105812,-0.014414,-0.280683,0.198036,0.047626,0.117720,-0.098854,-0.114601,0.141763,-0.121298,0.076275,-0.052259,-0.005810,0.073417,0.134208,-0.202815,-0.053657,0.062656,0.159637,0.216261,-0.095636,0.105309,-0.228407,-0.162144,0.017472,0.253451,-0.007337,-0.032775,0.110262,0.048720,0.208400,0.047811,0.111623,0.026325,-0.095523,-0.195715,0.299057,-0.228980
681205,0.167718,-0.010268,0.166411,0.015175,0.146437,0.117492,0.128827,0.203665,0.081403,0.120571,-0.064592,-0.034651,0.143938,0.203596,-0.182197,-0.036411,-0.034682,0.066971,-0.043622,-0.098660,-0.019483,-0.041324,-0.236113,0.120389,0.097509,-0.039010,-0.103000,-0.094080,0.175733,-0.119863,0.020054,0.133245,-0.152508,-0.205495,-0.012561,0.074275,-0.217732,0.167670,0.116621,-0.112962,...,0.058344,0.209800,0.156360,-0.184329,-0.158639,0.228578,0.127367,0.202793,-0.054980,0.011269,0.019618,-0.172772,0.110823,0.037647,-0.135860,0.170042,0.156205,-0.182321,-0.213955,-0.279827,0.194337,0.223086,0.247091,0.214060,-0.060557,-0.137423,-0.135056,0.159654,0.049456,0.027847,0.095864,0.128809,-0.002596,0.193943,-0.163205,0.025511,-0.050424,-0.188478,0.092133,-0.223267


## Export Embedding Graph of ComplEx Model

In [None]:
emb_ComplEx_df.to_csv("./Data/Dataset_Mongo_CTEL/Full_ComplEx_Embedding.csv")

## Retrieve Embedding Graph of TransE model

In [None]:
#Entities List
emb_TransE_lis = pickle.load(open( "./Data/Dataset_Mongo_CTEL/CompanyEntities_list.pkl", "rb" ))

In [None]:
emb_TransE_df = pd.DataFrame(model_TransE.get_embeddings(emb_TransE_lis))

In [None]:
emb_TransE_df.shape

(681207, 150)

In [None]:
emb_TransE_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149
0,0.038146,0.016902,-0.047978,0.144328,-0.016559,0.028748,0.015483,-0.024905,-0.010923,0.039925,-0.029402,0.010928,-0.027149,-0.070729,-0.031436,0.005346,-0.033559,0.046833,0.013520,-0.055310,-0.071310,-0.020129,0.044608,0.021814,0.068817,-0.052450,-0.031415,-0.007825,0.020104,-0.088672,0.036093,-0.021003,0.014229,0.079416,0.019641,-0.050079,-0.144792,0.092582,-0.028887,-0.160367,...,-0.023451,-0.023562,0.013974,0.038288,0.007219,0.030865,-0.035910,0.004654,0.039250,0.017566,0.119743,-0.047869,0.055755,0.079744,-0.001853,0.049149,-0.003652,0.007050,0.051485,0.017010,0.039484,-0.130004,-0.023382,-0.021631,-0.044393,-0.056297,0.045814,0.003187,-0.046391,-0.033315,-0.008831,0.061149,0.004793,-0.007337,-0.019384,0.029060,-0.055237,0.024157,0.011640,-0.104135
1,0.044277,-0.042314,-0.020673,-0.000616,0.064216,-0.063579,-0.044402,-0.057172,-0.064521,-0.062213,0.010683,-0.029329,0.039617,-0.000832,-0.013909,-0.064592,-0.038155,-0.035963,-0.062940,-0.062956,0.037228,0.063212,-0.064172,-0.014963,0.037645,-0.038797,0.062280,-0.037994,-0.038769,0.014900,-0.011911,-0.003142,-0.061555,-0.044890,0.013082,0.016058,0.020839,0.017566,0.048453,0.039973,...,0.061575,0.060939,0.064653,-0.040666,-0.067578,-0.039284,-0.060055,0.013632,-0.065256,-0.064109,-0.046872,-0.019517,0.037355,-0.037310,-0.025931,-0.021932,0.028256,-0.055360,-0.030524,0.062092,-0.064335,0.001360,0.064635,0.010909,-0.016937,-0.001636,-0.063398,0.043072,0.036836,-0.041449,-0.041012,-0.039979,0.020225,-0.024460,-0.040315,-0.028925,0.066291,-0.038400,0.053159,-0.032780
2,0.021034,-0.025471,0.007272,-0.006624,0.053237,-0.045981,-0.022369,-0.051854,-0.052888,-0.054399,-0.005044,-0.016466,0.009693,-0.021600,0.006050,-0.034346,-0.037862,-0.005759,-0.050583,-0.027049,0.025170,0.052537,-0.036628,-0.005700,0.026292,0.008130,0.028918,-0.035858,-0.036315,0.016860,-0.006612,-0.033279,-0.051746,-0.035064,0.008208,-0.003414,-0.004367,-0.021709,0.033498,0.012817,...,0.052106,0.049770,0.049628,-0.010041,-0.054684,-0.015725,-0.028467,-0.003406,-0.052505,-0.047010,0.007104,0.025296,0.015284,-0.032555,-0.029535,-0.005100,0.007801,-0.048772,-0.035683,0.053371,-0.043535,-0.003162,0.053419,0.006832,0.001714,-0.005413,-0.011870,0.025609,0.041551,-0.032905,-0.003870,0.004020,0.015859,-0.002145,0.013829,-0.025561,0.038976,-0.023801,0.046759,0.004299
3,0.000502,0.018254,0.000159,0.202396,-0.034503,0.044713,0.018162,-0.014441,0.013839,0.052033,-0.019217,0.018385,-0.041804,-0.008816,-0.055411,0.059425,0.056563,-0.062869,0.041857,-0.000571,-0.051010,-0.033086,0.067953,0.008186,0.040438,0.038559,-0.041482,0.006988,0.006398,-0.049971,0.046235,0.265696,0.039412,-0.008407,0.060872,-0.040725,-0.061337,-0.029589,-0.029172,0.066811,...,-0.034513,-0.033130,-0.022753,0.036875,0.033588,0.026741,-0.002443,0.090494,0.050784,0.027033,0.147826,0.058723,0.222660,0.017884,-0.043927,-0.031883,0.020978,0.025297,0.013178,-0.005744,0.050079,0.199354,-0.041496,0.041099,0.034418,0.027499,-0.123779,0.017602,0.015937,0.021042,0.065423,-0.053319,-0.051616,-0.006752,-0.020191,-0.093623,-0.104468,0.033858,-0.011664,0.026752
4,0.026270,-0.009819,-0.013286,0.137297,-0.014506,0.035627,-0.005599,-0.033662,-0.005459,0.028355,-0.009126,0.001056,0.011945,-0.053948,0.004185,0.030569,0.091757,-0.199662,-0.003977,-0.067402,-0.069910,-0.008423,0.038990,0.089075,0.074111,0.067492,-0.030187,-0.058595,0.024971,-0.068331,0.014824,0.010258,0.007639,0.093470,0.063294,-0.036534,0.047934,-0.057680,0.143250,-0.002490,...,-0.013397,-0.022898,0.004490,0.025576,0.020356,0.004980,-0.012430,-0.030964,0.035748,0.015958,0.043542,-0.041662,0.051376,0.044849,-0.016959,-0.027934,0.056005,0.014497,0.057949,-0.000691,0.032447,0.042247,-0.022928,-0.026833,-0.061639,-0.076117,-0.041834,0.056561,-0.048323,-0.017563,-0.021542,-0.021445,0.000256,-0.073013,-0.037059,0.121660,-0.071149,-0.008266,0.008247,-0.118993
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
681202,-0.031753,0.002345,-0.071574,0.005591,-0.013841,0.001958,0.000999,0.010487,0.002155,0.003296,-0.049070,0.040835,-0.027907,0.063031,0.062055,-0.033054,0.040411,0.053524,0.009171,-0.002926,-0.007294,-0.014280,0.010808,0.061657,-0.034129,-0.004767,-0.015157,-0.008301,0.033528,0.003256,0.063635,-0.032126,0.002121,-0.052807,0.004584,-0.050316,0.010887,0.031099,-0.107610,-0.029451,...,-0.015008,-0.000029,0.008820,0.046977,0.014668,-0.001449,0.033126,0.038266,0.004735,0.001153,0.157979,0.038294,-0.244177,0.022805,-0.050476,0.125127,0.014612,0.007547,-0.023883,-0.027883,0.000682,-0.003798,0.006836,-0.074187,0.047406,0.043229,0.111294,0.012631,-0.017623,0.055567,-0.019401,-0.037599,-0.032504,0.038758,0.109019,-0.108693,-0.023461,-0.011455,-0.015270,0.053285
681203,0.020722,-0.023275,0.003794,-0.003342,0.042456,-0.048186,-0.015929,-0.038101,-0.030378,-0.051653,0.000684,-0.012607,0.013339,-0.014153,0.019698,-0.011625,-0.030798,-0.008262,-0.021209,-0.011423,0.033059,0.041593,-0.046476,0.011214,-0.002664,-0.013873,0.045330,-0.014596,-0.031295,0.033238,0.008004,-0.007510,-0.037287,-0.022260,-0.000125,0.002321,-0.046515,-0.047129,0.005643,0.023658,...,0.045367,0.047712,0.026399,-0.032711,-0.044579,-0.033713,-0.008068,0.041798,-0.054593,-0.056138,-0.035884,0.027334,0.011761,-0.045444,-0.011318,0.015963,-0.013788,-0.054517,-0.033987,0.044666,-0.055523,-0.010616,0.053562,-0.033999,-0.010785,-0.002048,-0.014761,0.014861,0.032782,-0.024560,-0.006593,-0.012258,0.013534,0.014879,-0.001223,-0.021467,0.042974,-0.024645,0.032867,0.044304
681204,-0.012151,-0.004897,0.050457,0.136198,0.025681,0.014432,-0.027572,-0.029454,0.016121,0.007948,0.039994,-0.037928,0.009800,0.010073,-0.022752,-0.017211,-0.033855,0.155541,-0.012462,-0.048963,-0.019793,0.024531,0.007277,0.030975,0.044045,-0.001694,-0.000714,0.012085,-0.044024,0.042527,-0.030486,0.259824,-0.008706,0.011513,-0.015929,-0.018226,0.023475,0.064333,-0.158424,0.150763,...,-0.004550,-0.008306,-0.037878,-0.035658,0.007774,-0.001901,0.028099,-0.002504,0.011322,0.009255,0.189518,0.015042,0.011537,-0.004871,0.008155,0.016958,-0.072867,0.028783,0.005054,0.015118,0.012566,-0.009638,-0.012652,-0.005129,0.088642,0.019238,-0.036181,0.033826,0.056953,-0.035129,0.011881,-0.023415,-0.088509,0.017274,-0.140075,-0.046294,-0.016661,-0.008519,0.035571,-0.027697
681205,-0.000592,-0.010748,0.063838,0.076743,-0.013504,0.024333,0.029255,0.068827,0.059515,0.022525,-0.025720,0.060781,0.021077,0.003026,0.010128,0.033360,-0.003433,-0.035025,0.030853,0.031308,-0.074438,-0.009665,0.057139,0.043164,0.004846,0.007567,-0.059101,0.059905,0.024027,-0.079505,0.021711,0.243336,0.061790,0.065439,-0.075410,-0.010596,0.113264,-0.078820,-0.040643,0.119643,...,-0.027198,-0.026329,-0.040983,0.012596,-0.002456,0.007304,0.098963,0.078183,0.027681,0.044646,0.056283,-0.014565,0.154736,0.052422,0.037544,-0.103408,0.074160,0.020917,0.047778,-0.035077,0.030017,0.121063,-0.018542,-0.026125,0.082832,0.146085,-0.106829,-0.002379,-0.029296,0.087517,0.043530,-0.001201,0.112363,0.023338,0.019734,0.035356,-0.006198,0.006420,0.019176,-0.051041


## Export Embedding Graph of TransE Model

In [None]:
emb_TransE_df.to_csv("./Data/Dataset_Mongo_CTEL/Full_TransE_Embedding.csv")