In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.metrics.pairwise import cosine_similarity
import dgl
from dgl.nn import GATConv

In [2]:
import os
import pandas as pd
import numpy as np

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [4]:
DATA_HOME = "/lyceum/jhk1c21/msc_project/data"
FILTERED_PATH = os.path.join(DATA_HOME, "graph", "filtered")

In [5]:
# Load the data
titles = np.load(os.path.join(FILTERED_PATH, 'filtered_title.npy'))
abstracts = np.load(os.path.join(FILTERED_PATH, 'filtered_abstract.npy'))
keywords = np.load(os.path.join(FILTERED_PATH, 'filtered_keyword.npy'))
domains = np.load(os.path.join(FILTERED_PATH, 'filtered_domain.npy'))

ids = np.load(os.path.join(FILTERED_PATH, "filtered_id.npy"))
edges = np.load(os.path.join(FILTERED_PATH, 'filtered_edges.npy'))

In [6]:
df = pd.DataFrame()
df['src'] = edges[:, 0]
df['des'] = edges[:, 1]

# convert id from str to numbers
id_to_int = {original_id: i for i, original_id in enumerate(ids)}
int_to_id = {i: original_id for original_id, i in id_to_int.items()}

df['src'] = df['src'].apply(lambda x: id_to_int[x])
df['des'] = df['des'].apply(lambda x: id_to_int[x])

In [7]:
# Create a DGL graph
citation_network = dgl.graph( (df['src'], df['des']) )

citation_network.ndata['title'] = torch.FloatTensor(titles)
citation_network.ndata['abstract'] = torch.FloatTensor(abstracts)
citation_network.ndata['keywords'] = torch.FloatTensor(keywords)
citation_network.ndata['domain'] = torch.FloatTensor(domains)

In [None]:
# GAT Layer
class GATLayer(nn.Module):
    def __init__(self, in_dim, out_dim, num_heads):
        super(GATLayer, self).__init__()
        self.gatconv = GATConv(in_dim, out_dim, num_heads, allow_zero_in_degree=True)
        
    def forward(self, g, h):
        h = self.gatconv(g, h)
        return h.squeeze(1)

# GAT Model
class GATModel(nn.Module):
    def __init__(self, in_dim, hidden_dim, out_dim, num_heads=1):
        super(GATModel, self).__init__()
        self.layer1 = GATLayer(in_dim, hidden_dim, num_heads)
        self.layer2 = GATLayer(hidden_dim*num_heads, out_dim, 1)
        
    def forward(self, g, h):
        h = F.dropout(g, p=0.6, training=self.training)
        h = self.layer1()
        h = F.relu(self.layer1(g, h))
        h = self.layer2(g, h)
        return h

In [14]:
with open(os.path.join(DATA_HOME, "embedding", "tmp", "tmp_abstract_embedding_list_50.npy")) as tmp:
    res = []
    for rl in tmp.readlines():
        res.extend(eval(rl.strip()))
eval(res)

TypeError: eval() arg 1 must be a string, bytes or code object

'/lyceum/jhk1c21/msc_project/data/embedding/tmp/tmp_abstract_embedding_list_50.npy'