In [2]:

import torch
from torch.utils.data import TensorDataset, DataLoader, SequentialSampler
from transformers import BertTokenizer, BertModel
import torch.nn as nn
import torch.nn.functional as F

from bert import preprocessing, generate_node_embeddings

import json
from tqdm import tqdm
import pandas as pd
import time

from load_pubmed import get_pubmed_casestudy
from main_pubmed_gnn import GCN

BATCH_SIZE = 16


# load data
print("[!] Loading dataset")
f = open('pubmed.json')
pubmed = json.load(f)
df_pubmed = pd.DataFrame.from_dict(pubmed)

# Preprocess
print("[!] Preprocessing")
start = time.time()
AB = df_pubmed['AB'].fillna("")
TI = df_pubmed['TI'].fillna("")
text = []
for ti, ab in zip(TI, AB):
    t = 'Title: ' + ti + '\n'+'Abstract: ' + ab
    # t = ti + ab
    text.append(t)
token_id = []
attention_masks = []
tokenizer = BertTokenizer.from_pretrained(
    'bert-base-uncased', do_lower_case=True)
for sample in tqdm(text):
    encoding_dict = preprocessing(sample, tokenizer)
    token_id.append(encoding_dict['input_ids'])
    attention_masks.append(encoding_dict['attention_mask'])
token_id = torch.cat(token_id, dim=0)
attention_masks = torch.cat(attention_masks, dim=0)
print("Time: ", time.time()-start)

# Prepare DataLoader
dataset = TensorDataset(token_id, attention_masks)
dataloader = DataLoader(
    dataset,
    shuffle=False,
    sampler=SequentialSampler(dataset),
    batch_size=BATCH_SIZE
)

# Load the BertForSequenceClassification model
bert = BertModel.from_pretrained(
    'bert-base-uncased',
    output_attentions=False,
    output_hidden_states=True,
)

# Run on GPU
print("[!] Generating node embeddings")
start = time.time()
bert.cuda()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
features = generate_node_embeddings(bert, dataloader, device)
print("Time: ", time.time()-start)

data, data_pubid = get_pubmed_casestudy()
data.x = features
gnn_model = GCN(
    in_channels=data.x.shape[1], hidden_channels=128, out_channels=3, num_layers=4, dropout=0)
gnn_model.cuda()

# print("[!] Start training")

# data.cuda()
# optimizer_gnn = torch.optim.Adam(gnn_model.parameters(), lr=0.001)
# optimizer_lm = torch.optim.Adam(bert.parameters(), lr=0.001)


[!] Loading dataset
[!] Preprocessing


  0%|          | 0/19717 [00:00<?, ?it/s]Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
100%|██████████| 19717/19717 [01:27<00:00, 225.19it/s]


Time:  88.83961486816406


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[!] Generating node embeddings


100%|██████████| 1233/1233 [01:54<00:00, 10.75it/s]


Time:  118.73420071601868


GCN(
  (convs): ModuleList(
    (0): GCNConv(768, 128)
    (1): GCNConv(128, 128)
    (2): GCNConv(128, 128)
    (3): GCNConv(128, 3)
  )
  (bns): ModuleList(
    (0): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
)

In [6]:
from main_pubmed_gnn import GCN, train, test, get_pubmed_casestudy
import torch

data, data_pubid = get_pubmed_casestudy()
gnn_model = GCN(
    in_channels=data.x.shape[1], hidden_channels=128, out_channels=3, num_layers=4, dropout=0)
gnn_model.cuda()

data.cuda()
data.x.requires_grad = True
    # X.retain_grad()
optimizer = torch.optim.Adam(gnn_model.parameters(), lr=0.001)
loss = train(gnn_model, data, optimizer)
data.x.grad.shape

torch.Size([19717, 500])

In [17]:
data.x.grad.mean(0)

tensor([ 1.1385e-05,  2.2168e-05, -1.6015e-05, -1.8941e-06, -9.6117e-06,
        -4.6854e-06, -8.3772e-06, -1.0540e-05,  1.8001e-05, -7.0987e-06,
        -2.2527e-06, -1.5540e-07,  1.2920e-05,  6.0613e-06,  1.7150e-05,
        -4.0121e-06, -1.1031e-05, -9.1760e-06, -7.0928e-06,  1.7955e-06,
         1.2213e-05, -1.6607e-05,  3.1364e-06,  1.4918e-05, -9.0528e-06,
         2.2131e-06, -6.3234e-06,  1.7106e-05,  7.9889e-06, -5.8155e-06,
         6.2206e-06,  2.4634e-06,  1.3613e-05, -5.9173e-07,  3.7671e-06,
        -2.2502e-06,  8.5266e-06,  1.8880e-05,  3.7765e-06, -4.8494e-07,
         1.8441e-06,  1.1724e-05, -1.0196e-05, -3.8486e-06,  6.4225e-06,
         1.5954e-05, -1.3176e-06,  7.6001e-07,  6.3990e-06,  1.1572e-05,
        -6.5945e-06,  7.0943e-06, -3.2693e-06, -1.7289e-05,  6.2373e-06,
         1.6362e-06, -9.7330e-06,  7.6515e-06,  8.0297e-07, -1.1037e-05,
        -1.5831e-05,  2.1412e-06, -9.0578e-06,  9.8477e-06, -5.3620e-06,
        -6.0833e-06, -4.7885e-08,  5.4665e-07, -5.5