In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
! pip install -r drive/MyDrive/Diploma/requirements.txt >& /dev/null

In [None]:
! nvidia-smi

Sun May 16 21:29:46 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 465.19.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   39C    P8     9W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
import numpy as np
import torch
from torch import nn

import wandb


from tqdm.notebook import tqdm

from drive.MyDrive.Diploma.environment import KGEnv
from drive.MyDrive.Diploma.utils import (
    create_test_dataset, create_random_dataset,
    create_model, read_dataset_from_file, LabelSmoothingCrossEntropy,
    load_config
)
from drive.MyDrive.Diploma.dataset import KGDataset
from drive.MyDrive.Diploma.pretrain import evaluate, pretrain
from drive.MyDrive.Diploma.beam_search import get_ranks
from drive.MyDrive.Diploma.metrics import *

from drive.MyDrive.Diploma.load_config import config

%load_ext autoreload
%autoreload 2

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
env = KGEnv(config['train_triplets_path'])

entities_num = env.entities_num
relations_num = env.relations_num

In [None]:
train_env = KGEnv(config['train_triplets_path'], batch_size=1)
test_env = KGEnv(config['test_triplets_path'], train=False, batch_size=1)

In [None]:
train_dataset = create_random_dataset(train_env, out_file='drive/MyDrive/Diploma/kinship/train_dataset.txt', size=30000)

In [None]:
test_dataset = create_test_dataset(train_env, test_env, out_file='drive/MyDrive/Diploma/kinship/test_dataset.txt')

In [None]:
train_dataset = read_dataset_from_file('/content/drive/MyDrive/Diploma/kinship/train_dataset.txt', 50)
test_dataset = read_dataset_from_file('/content/drive/MyDrive/Diploma/kinship/test_dataset.txt', 50)

In [None]:
train_kg_dataset = KGDataset(train_dataset, train_env, shuffle=False)
test_kg_dataset = KGDataset(test_dataset, test_env, shuffle=False)

In [None]:
model = create_model(
    entity_input_dim=entities_num + 1,  # pad
    relation_input_dim=relations_num + 2,  # cls + pad
    output_dim=relations_num,
    entity_pad_idx=train_env.e_pad_idx,
    relation_pad_idx=train_env.r_pad_idx,
    hid_dim=128,
    enc_pf_dim=256,
    device=device
)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = LabelSmoothingCrossEntropy()

In [None]:
wandb.init(project="RL4KGQA", name="Pretrain")

In [None]:
train_env = KGEnv(config['train_triplets_path'], batch_size=256)
test_env = KGEnv(config['train_triplets_path'], batch_size=64)

In [None]:
for epoch in tqdm(range(config['num_epochs'])):
    train_loss, train_accuracy = pretrain(model, train_kg_dataset, optimizer, criterion, train_env)
    test_loss, test_accuracy = evaluate(model, test_kg_dataset, criterion, test_env)

    wandb.log({"Train CE loss": train_loss,
               "Train accuracy": train_accuracy,
               "Test CE loss": test_loss,
               "Test accuracy": test_accuracy})

In [None]:
torch.save(model.state_dict(), 'drive/MyDrive/Diploma/models/pretrained_agent.pt')