## 환경세팅

In [1]:
# Add this in a Google Colab cell to install the correct version of Pytorch Geometric.
import torch

def format_pytorch_version(version):
  return version.split('+')[0]

TORCH_version = torch.__version__
TORCH = format_pytorch_version(TORCH_version)
print(TORCH)

def format_cuda_version(version):
  return 'cu' + version.replace('.', '')

CUDA_version = torch.version.cuda
CUDA = format_cuda_version(CUDA_version)
print(CUDA)

!pip install torch-scatter     -q -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
!pip install torch-sparse      -q -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
!pip install torch-cluster     -q -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
!pip install torch-spline-conv -q -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
!pip install -q torch-geometric
!pip install -q git+https://github.com/snap-stanford/deepsnap.git

1.9.0
cu102
[K     |████████████████████████████████| 2.6MB 6.4MB/s 
[K     |████████████████████████████████| 1.4MB 6.1MB/s 
[K     |████████████████████████████████| 931kB 7.3MB/s 
[K     |████████████████████████████████| 389kB 6.8MB/s 
[K     |████████████████████████████████| 225kB 9.5MB/s 
[K     |████████████████████████████████| 235kB 15.6MB/s 
[K     |████████████████████████████████| 51kB 6.6MB/s 
[?25h  Building wheel for torch-geometric (setup.py) ... [?25l[?25hdone
  Building wheel for deepsnap (setup.py) ... [?25l[?25hdone


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Understand the Datasets

In [11]:
import os

def get_file_path(root="/content/drive/MyDrive/data/FB15k-237",
                  file_name="ind2ent.pkl"):
    ## data path
    file_path = root
    pkl_name = file_name
    pkl_file = os.path.join(file_path, pkl_name)

    return pkl_file

In [27]:
import pickle

pkl_file = get_file_path(file_name="ind2ent.pkl")
with open(pkl_file, 'rb') as f:
    ind2ent = pickle.load(f)

pkl_file = get_file_path(file_name="ind2rel.pkl")
with open(pkl_file, 'rb') as f:
    ind2rel = pickle.load(f)

pkl_file = get_file_path(file_name="train_ans_2i.pkl")
with open(pkl_file, 'rb') as f:
    train_ans_2i = pickle.load(f)

In [25]:
# 두 pkl 모두 dict 형태입니다.
type(ind2ent), type(ind2rel)

(dict, dict)

In [26]:
# 14505개의 entity를 가지고 있습니다.
list(ind2ent.items())[:10]

[(0, '/m/027rn'),
 (1, '/m/06cx9'),
 (2, '/m/017dcd'),
 (3, '/m/06v8s0'),
 (4, '/m/07s9rl0'),
 (5, '/m/0170z3'),
 (6, '/m/01sl1q'),
 (7, '/m/044mz_'),
 (8, '/m/0cnk2q'),
 (9, '/m/02nzb8')]

In [23]:
list(ind2ent.items())[-10:]

[(14495, '/m/03ykjs9'),
 (14496, '/m/060m4'),
 (14497, '/m/04p4r'),
 (14498, '/m/017yzc'),
 (14499, '/m/021yyx'),
 (14500, '/m/01_30_'),
 (14501, '/m/02p4pt3'),
 (14502, '/m/09kqc'),
 (14503, '/m/04y41'),
 (14504, '/m/01tv5c')]

In [22]:
# 총 474개의 relations를 가지고 있습니다.
list(ind2rel.items())[:10]

[(0, '/location/country/form_of_government'),
 (1, '/location/country/form_of_government_reverse'),
 (2, '/tv/tv_program/regular_cast./tv/regular_tv_appearance/actor'),
 (3, '/tv/tv_program/regular_cast./tv/regular_tv_appearance/actor_reverse'),
 (4, '/media_common/netflix_genre/titles'),
 (5, '/media_common/netflix_genre/titles_reverse'),
 (6, '/award/award_winner/awards_won./award/award_honor/award_winner'),
 (7, '/award/award_winner/awards_won./award/award_honor/award_winner_reverse'),
 (8,
  '/soccer/football_team/current_roster./sports/sports_team_roster/position'),
 (9,
  '/soccer/football_team/current_roster./sports/sports_team_roster/position_reverse')]

In [24]:
list(ind2rel.items())[-10:]

[(464, '/film/actor/dubbing_performances./film/dubbing_performance/language'),
 (465,
  '/film/actor/dubbing_performances./film/dubbing_performance/language_reverse'),
 (466, '/broadcast/content/artist'),
 (467, '/broadcast/content/artist_reverse'),
 (468,
  '/location/statistical_region/gni_per_capita_in_ppp_dollars./measurement_unit/dated_money_value/currency'),
 (469,
  '/location/statistical_region/gni_per_capita_in_ppp_dollars./measurement_unit/dated_money_value/currency_reverse'),
 (470, '/music/instrument/family'),
 (471, '/music/instrument/family_reverse'),
 (472,
  '/government/politician/government_positions_held./government/government_position_held/jurisdiction_of_office'),
 (473,
  '/government/politician/government_positions_held./government/government_position_held/jurisdiction_of_office_reverse')]

In [29]:
# train data 중 2i 형태의 query를 확인해 보겠습니다.
# key는 ()로 묶여있고 value는 {}로 묶여있습니다.
list(train_ans_2i.items())[:5]

[(((32, (411,)), (6463, (70,))), {1806, 2732}),
 (((382, (97,)), (4977, (47,))), {3219, 4859, 11863}),
 (((124, (471,)), (1241, (150,))), {1462, 3439, 3702, 4203, 7891}),
 (((3482, (35,)), (6123, (97,))), {6122, 6744}),
 (((1164, (271,)), (3547, (48,))), {705})]

In [32]:
ind2ent[32], ind2rel[411]

('/m/09c7w0',
 '/location/administrative_division/first_level_division_of_reverse')

In [33]:
ind2ent[6463], ind2rel[70]

('/m/04h1rz',
 '/government/legislative_session/members./government/government_position_held/district_represented')

In [34]:
ind2ent[1806], ind2ent[2732]

('/m/05k7sb', '/m/081mh')