In [3]:
from RGCN.layers import RGCNBasisLayer
import time
import ProcessData
import random
import copy
import math
import torch
from conv import GraphConv
import torch.nn as nn
import dgl
import dgl.function as fn
import torch as th
import torch.nn.functional as F
import networkx as nx
import matplotlib.pyplot as plt
from dgl import DGLGraph
from torch.autograd import Variable
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from functools import partial
import pandas as pd
import numpy as np

In [4]:
#留给GPU的接口
def create_variable(tensor):
    return Variable(tensor)

In [5]:
class RNNEncoder(nn.Module):
    
    def __init__(self,input_size,hidden_size,output_size,n_layers=1,bidirectional=True):
        super(RNNEncoder, self).__init__()
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        self.n_directions = int(bidirectional) + 1
        
        self.embedding = nn.Embedding(input_size, hidden_size)
        #self.gru = nn.GRU(hidden_size, hidden_size, n_layers,bidirectional=bidirectional)
        self.LSTM = nn.LSTM(hidden_size, hidden_size,num_layers = n_layers,bidirectional=bidirectional)
        self.fc = nn.Linear(hidden_size, output_size)
        
    
    def forward(self, input, seq_lengths):
        # Note: we run this all at once (over the whole input sequence)
        # input shape: B x S (input size)
        # transpose to make S(sequence) x B (batch)
        input = input.t()
        batch_size = input.size(1)

        # Make a hidden
        #hidden = self._init_hidden(batch_size)
        h_0 = torch.zeros(self.n_layers * self.n_directions,
                             batch_size,self.hidden_size)
        c_0 = torch.zeros(self.n_layers * self.n_directions,
                             batch_size,self.hidden_size)
        # Embedding S x B -> S x B x I (embedding size)
        #print("s*b",input.size())
        embedded = self.embedding(input.long())
        #print("s*b*i",embedded.size())
        # Pack them up nicely
        #gru_input = pack_padded_sequence(embedded, seq_lengths.data.cpu().numpy())

        # To compact weights again call flatten_parameters().
        
        #self.gru.flatten_parameters()
        #output, hidden = self.gru(embedded, hidden)
        
        self.LSTM.flatten_parameters()
        
        output, (h_n, c_n) = self.LSTM(embedded, (h_0,c_0))
        # Use the last layer output as FC's input
        # No need to unpack, since we are going to use hidden
        fc_output = self.fc(h_n[-1,:])
        return fc_output
    def _init_hidden(self,batch_size):
        hidden = torch.zeros(self.n_layers * self.n_directions,
                            batch_size,self.hidden_size)
        return create_variable(hidden)
        

In [6]:
class BaseRGCN(nn.Module):
    def __init__(self, num_nodes, h_dim, out_dim, num_rels, num_bases=-1,
                 num_hidden_layers=1, dropout=0, use_cuda=False):
        super(BaseRGCN, self).__init__()
        self.num_nodes = num_nodes
        self.h_dim = h_dim
        self.out_dim = out_dim
        self.num_rels = num_rels
        self.num_bases = num_bases
        self.num_hidden_layers = num_hidden_layers
        self.dropout = dropout
        self.use_cuda = use_cuda

        # create rgcn layers
        self.build_model()

        # create initial features
        #self.features = self.create_features()

    def build_model(self):
        self.layers = nn.ModuleList()
        # i2h
        i2h = self.build_input_layer()
        if i2h is not None:
            self.layers.append(i2h)
        # h2h
        for idx in range(self.num_hidden_layers):
            h2h = self.build_hidden_layer(idx)
            self.layers.append(h2h)
        # h2o
        h2o = self.build_output_layer()
        if h2o is not None:
            self.layers.append(h2o)

    # initialize feature for each node
    def create_features(self):
        return None

    def build_input_layer(self):
        return None

    def build_hidden_layer(self, idx):
        raise NotImplementedError

    def build_output_layer(self):
        return None

    def forward(self, g, features):
        self.features = features
        if self.features is not None:
            g.ndata['h'] = self.features
        for layer in self.layers:
            layer(g)
        return g.ndata.pop('h')

In [7]:
class RGCN(BaseRGCN):
    def create_features(self):
        features = torch.arange(self.num_nodes)
        if self.use_cuda:
            features = features.cuda()
        return features

    def build_input_layer(self):
        return RGCNBasisLayer(self.num_nodes, self.h_dim, self.num_rels, self.num_bases, activation=F.relu, is_input_layer=True)

    def build_hidden_layer(self, idx):
        return RGCNBasisLayer(self.h_dim, self.h_dim, self.num_rels, self.num_bases,
                         activation=F.relu)

    def build_output_layer(self):
        return RGCNBasisLayer(self.h_dim, self.out_dim, self.num_rels,self.num_bases)

In [8]:
def Stratified_Sampling(labels):
    def typicalSampling(group, typicalFracDict):
        name = group.name
        frac = typicalFracDict[name]
        return group.sample(frac=frac)
    df = pd.DataFrame({
                      'labels': labels})
    typicalFracDict = {}
    for key in range(max(labels)+1):
         typicalFracDict[key] = 0.8
    result = df.groupby('labels', group_keys=False).apply(typicalSampling, typicalFracDict)
    train_indx=list(result.index)
    test_indx=[]
    for i in range(len(labels)):
        if i not in train_indx:
            test_indx.append(i)
    return train_indx,test_indx

In [9]:
def Comput_Accuracy(index,output,labels_y):
    test_result = output[index]
    pred = test_result.data.max(1, keepdim=True)[1]
    test_target = labels_y[index]
    correct = 0
    correct += pred.eq(test_target.data.view_as(pred)).cpu().sum()
    return float(correct)/len(test_result)

In [10]:
def Create_Graph(g,full_name,authors_split,jconf,organization):
    g.add_nodes(len(authors_split))
    edge_type = []
    for i in range(len(authors_split)):
        for j in range(len(authors_split)):
            if jconf[i] == jconf[j]:
                g.add_edges(i,j)
                edge_type.append(0)
            
            for k in range(len(authors_split[i])):
                if  authors_split[i][k] in authors_split[j] and authors_split[i][k] != full_name:
                    #print("name: ",authors_split[i][k],"i: ",i,"j: ",j)
                    g.add_edges(i,j)
                    edge_type.append(1)
                    break;
            if organization[i] == organization[j] and organization[i] != 'null':
                g.add_edges(i,j)
                edge_type.append(2)
    return edge_type
            

In [11]:
def Create_Edge_List(full_name,authors_split,jconf,organization):
    edge_type = []
    edge_list_src = []
    edge_list_dst = []
    for i in range(len(authors_split)):
        for j in range(len(authors_split)):
            if jconf[i] == jconf[j]:
                edge_type.append(0)
                edge_list_src.append(i)
                edge_list_dst.append(j)
            for k in range(len(authors_split[i])):
                if  authors_split[i][k] in authors_split[j] and authors_split[i][k] != full_name:
                    #print("name: ",authors_split[i][k],"i: ",i,"j: ",j)
                    edge_type.append(1)
                    edge_list_src.append(i)
                    edge_list_dst.append(j)
                    break;
            if organization[i] == organization[j] and organization[i] != 'null':
                edge_list_src.append(i)
                edge_type.append(2)
                edge_list_dst.append(j)
    return edge_type,edge_list_src,edge_list_dst
            

# <b> Model

In [12]:
class GCN_Plus_RNN(nn.Module):
    def __init__(self,RNN_input_size,RNN_hidden_size,RGCN_input_size,RGCN_hidden_size,Num_classes,Num_rels,Num_bases=-1,Num_hidden_layers=1,dropout=0):
        super(GCN_Plus_RNN, self).__init__()
        
        self.RNN = RNNEncoder(RNN_input_size,RNN_hidden_size,RGCN_input_size)
        
        self.RGCN = RGCN(RGCN_input_size,
                         RGCN_hidden_size,
                         Num_classes,
                         Num_rels,
                         Num_bases,
                         Num_hidden_layers,
                         dropout)

        
    def forward(self, g, inputs, sequence_length):
        features = self.RNN(inputs,sequence_length)      #RNN编码

        x = self.RGCN(g,features)             
        #x = self.gcn1(g, features) 
        return x

# <b> 读数据

In [13]:
title,labels,jconf,authors,FullName,organization = ProcessData.ProcessingRawData("Data\\Jing Zhang\\Jing Zhang.xml")
title = ProcessData.Wipe_off_Punctuation(title)
title_vocab,title_split = ProcessData.Split_Title(title)
title_one_hot,Max_Sequence_Len,vocab_size = ProcessData.One_hot_encoding(title_vocab,title_split)
title_one_hot_padding = ProcessData.Padding_One_hot(title_one_hot,Max_Sequence_Len)
author_vocab,authors_split = ProcessData.Split_Authors(authors)
# len(authors_split)
# len(title_one_hot_padding)
# # vocab_size

# <b> 构造图

In [14]:
edge_type,edge_list_src,edge_list_dst = Create_Edge_List(FullName,authors_split,jconf,organization)
np.save("Data\\Jing Zhang\\edge_type.npy",edge_type)
np.save("Data\\Jing Zhang\\edge_list_src.npy",edge_list_src)
np.save("Data\\Jing Zhang\\edge_list_dst.npy",edge_list_dst)
# edge_type = np.load("Data\\LeiWang\\edge_type.npy")
# edge_list_src = np.load("Data\\LeiWang\\edge_list_src.npy")
# edge_list_dst = np.load("Data\\LeiWang\\edge_list_dst.npy")
# test_idx = np.load("Data\\LeiWang\\test_index.npy")
# train_idx = np.load("Data\\LeiWang\\train_index.npy")

NameError: name 'np' is not defined

In [230]:
g = dgl.DGLGraph()
g.add_nodes(len(authors_split))
g.add_edges(edge_list_src,edge_list_dst)
edge_norm = [1 for i in range(len(edge_type))]
g.edata.update({'type': torch.LongTensor(edge_type), 'norm': torch.Tensor(edge_norm)})
#edge_type = Create_Graph(g,FullName,authors_split,jconf,organization)

In [231]:
print("Number of edges: ",len(g.edges()[0]))
print("Number of nodes: ",len(authors_split))
print("Number of class: ",max(labels)+1)
vocab_size

Number of edges:  2169
Number of nodes:  231
Number of class:  85


911

In [232]:
y_data = labels
inputs = create_variable(torch.Tensor(title_one_hot_padding))
labels_y = create_variable(torch.LongTensor(y_data))

In [233]:
# train_idx = random.sample(range(len(authors_split)),264)
# test_idx = []
# for i in range(len(authors_split)):
#      if i not in training_idx:
#         test_idx.append(i)

In [234]:
train_idx,test_idx =Stratified_Sampling(labels)
print(len(train_idx),len(test_idx))

200 31


In [235]:
RNN_input_size = vocab_size
RNN_hidden_size = 50
RGCN_input_size = 25
RGCN_hidden_size = 20
Num_classes = max(labels)+1
Num_rels = 3
Num_layers = 1
dropout = 0.6
activation = F.relu
sequence_length = inputs.size()[1]

In [244]:
def Training(Times,Name):
    
    Max_Accuracy = 0
    for i in range(Times):
        
        train_idx,test_idx =Stratified_Sampling(labels)
        model = GCN_Plus_RNN(RNN_input_size,
                         RNN_hidden_size,
                         RGCN_input_size,
                         RGCN_hidden_size,
                         Num_classes,
                         Num_rels,
                         Num_bases=-1,
                         Num_hidden_layers=0,
                         dropout=dropout)
        optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=0)
        criterion = nn.CrossEntropyLoss()
        model.train()
        for i in range(201):
            optimizer.zero_grad()
            output = model(g, inputs, sequence_length)
            loss = criterion(output[train_idx], labels_y[train_idx])
            loss.backward()
            optimizer.step()
            
        Accuracy = Comput_Accuracy(test_idx,output,labels_y)
        print(Accuracy,Max_Accuracy,Accuracy > Max_Accuracy)
        if Accuracy > Max_Accuracy:
            Max_Accuracy = Accuracy
            np.save("Data\\David E. Goldberg\\train_index.npy",train_idx)
            np.save("Data\\David E. Goldberg\\test_index.npy",test_idx)
        print("Max_Accuracy:",Max_Accuracy)
        print("Accuracy:",Accuracy)

In [245]:
Training(5)

0.8387096774193549 0 True
Max_Accuracy: 0.8387096774193549
Accuracy: 0.8387096774193549
0.7419354838709677 0.8387096774193549 False
Max_Accuracy: 0.8387096774193549
Accuracy: 0.7419354838709677
0.7419354838709677 0.8387096774193549 False
Max_Accuracy: 0.8387096774193549
Accuracy: 0.7419354838709677
0.8709677419354839 0.8387096774193549 True
Max_Accuracy: 0.8709677419354839
Accuracy: 0.8709677419354839
0.8064516129032258 0.8709677419354839 False
Max_Accuracy: 0.8709677419354839
Accuracy: 0.8064516129032258


In [307]:
def domodel(filename):

    xml = filename + ".xml"
    trace="Data\\"+filename+"\\"
    title,labels,jconf,authors,FullName,organization = ProcessData.ProcessingRawData(trace+xml)
    title = ProcessData.Wipe_off_Punctuation(title)
    title_vocab,title_split = ProcessData.Split_Title(title)
    title_one_hot,Max_Sequence_Len,vocab_size = ProcessData.One_hot_encoding(title_vocab,title_split)
    title_one_hot_padding = ProcessData.Padding_One_hot(title_one_hot,Max_Sequence_Len)
    author_vocab,authors_split = ProcessData.Split_Authors(authors)
    


    edge_type,edge_list_src,edge_list_dst = Create_Edge_List(FullName,authors_split,jconf,organization)
    np.save(trace+"edge_type.npy",edge_type)
    np.save(trace+"edge_list_src.npy",edge_list_src)
    np.save(trace+"edge_list_dst.npy",edge_list_dst)
    
    g = dgl.DGLGraph()
    g.add_nodes(len(authors_split))
    g.add_edges(edge_list_src,edge_list_dst)
    edge_norm = [1 for i in range(len(edge_type))]
    g.edata.update({'type': torch.LongTensor(edge_type), 'norm': torch.Tensor(edge_norm)})
    print("Number of edges: ",len(g.edges()[0]))
    print("Number of nodes: ",len(authors_split))
    print("Number of class: ",max(labels)+1)
    
    y_data = labels
    inputs = create_variable(torch.Tensor(title_one_hot_padding))
    labels_y = create_variable(torch.LongTensor(y_data))
    
    RNN_input_size = vocab_size
    RNN_hidden_size = 50
    RGCN_input_size = 25
    RGCN_hidden_size = 20
    Num_classes = max(labels)+1
    Num_rels = 3
    Num_layers = 1
    dropout = 0.6
    activation = F.relu
    sequence_length = inputs.size()[1]
    
    Max_Accuracy = 0
    for i in range(5):
        
        train_idx,test_idx =Stratified_Sampling(labels)
        if len(train_idx)==len(authors_split):
            print("sample error!")
            break;
        model = GCN_Plus_RNN(RNN_input_size,
                         RNN_hidden_size,
                         RGCN_input_size,
                         RGCN_hidden_size,
                         Num_classes,
                         Num_rels,
                         Num_bases=-1,
                         Num_hidden_layers=0,
                         dropout=dropout)
        optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=0)
        criterion = nn.CrossEntropyLoss()
        model.train()
        for i in range(201):
            optimizer.zero_grad()
            output = model(g, inputs, sequence_length)
            loss = criterion(output[train_idx], labels_y[train_idx])
            loss.backward()
            optimizer.step()
            
        Accuracy = Comput_Accuracy(test_idx,output,labels_y)
        print(Accuracy,Max_Accuracy,Accuracy > Max_Accuracy)
        if Accuracy > Max_Accuracy:
            Max_Accuracy = Accuracy
            np.save(trace+"train_index.npy",train_idx)
            np.save(trace+"test_index.npy",test_idx)
        print("Max_Accuracy:",Max_Accuracy)
        print("Accuracy:",Accuracy)
        if Max_Accuracy == 1:
            break

In [288]:
names

['Ajay Gupta',
 'Alok Gupta',
 'Barry Wilkinson',
 'Bin Li',
 'Bin Yu',
 'Bin Zhu',
 'Bing Liu',
 'Bo Liu',
 'Bob Johnson',
 'Charles Smith',
 'Cheng Chang',
 'Daniel Massey',
 'David Brown',
 'David C. Wilson',
 'David Cooper',
 'David E. Goldberg',
 'David Jensen',
 'David Levine',
 'David Nelson',
 'Eric Martin',
 'F. Wang',
 'Fan Wang',
 'Fei Su',
 'Feng Liu',
 'Feng Pan',
 'Frank Mueller',
 'Gang Chen',
 'Gang Luo',
 'Hao Wang',
 'Hiroshi Tanaka',
 'Hong Xie',
 'Hui Fang',
 'Hui Yu',
 'J. Guo',
 'J. Yin',
 'Jeffrey Parsons',
 'Ji Zhang',
 'Jianping Wang',
 'Jie Tang',
 'Jie Yu',
 'Jim Gray',
 'Jing Zhang',
 'John Collins',
 'John F. McDonald',
 'John Hale',
 'Jose M. Garcia',
 'Juan Carlos Lopez',
 'Kai Tang',
 'Kai Zhang',
 'Ke Chen',
 'Keith Edwards',
 'Koichi Furukawa',
 'Kuo Zhang',
 'Lei Chen',
 'Lei Fang',
 'Lei Jin',
 'Lei Wang',
 'Li Shen',
 'Lu Liu',
 'M. Rahman',
 'Manuel Silva',
 'Mark Davis',
 'Michael Lang',
 'Michael Siegel',
 'Michael Smith',
 'Michael Wagner',
 'Ni

In [287]:
for name in names:
    print(name)
    domodel(name)
    print("\n\n")

Ajay Gupta
Number of edges:  221
Number of nodes:  36
Number of class:  9




0.8333333333333334 0 True
Max_Accuracy: 0.8333333333333334
Accuracy: 0.8333333333333334
0.8333333333333334 0.8333333333333334 False
Max_Accuracy: 0.8333333333333334
Accuracy: 0.8333333333333334
0.8333333333333334 0.8333333333333334 False
Max_Accuracy: 0.8333333333333334
Accuracy: 0.8333333333333334
0.8333333333333334 0.8333333333333334 False
Max_Accuracy: 0.8333333333333334
Accuracy: 0.8333333333333334
0.5 0.8333333333333334 False
Max_Accuracy: 0.8333333333333334
Accuracy: 0.5



Alok Gupta
Number of edges:  532
Number of nodes:  57
Number of class:  2
1.0 0 True
Max_Accuracy: 1.0
Accuracy: 1.0
0.9090909090909091 1.0 False
Max_Accuracy: 1.0
Accuracy: 0.9090909090909091
0.9090909090909091 1.0 False
Max_Accuracy: 1.0
Accuracy: 0.9090909090909091
1.0 1.0 False
Max_Accuracy: 1.0
Accuracy: 1.0
1.0 1.0 False
Max_Accuracy: 1.0
Accuracy: 1.0



Barry Wilkinson
Number of edges:  163
Number of nodes:  28
Number of class:  1
1.0 0 True
Max_Accuracy: 1.0
Accuracy: 1.0
1.0 1.0 False
Max_Accuracy: 1

RuntimeError: cannot perform reduction function max on tensor with no elements because the operation does not have an identity

In [301]:
#'F. Wang',
names1=['F. Wang',
 'Fan Wang',
 'Fei Su',
 'Feng Liu',
 'Feng Pan',
 'Frank Mueller',
 'Gang Chen',
 'Gang Luo',
 'Hao Wang',
 'Hiroshi Tanaka',
 'Hong Xie',
 'Hui Fang',
 'Hui Yu',
 'J. Guo',
 'J. Yin',
 'Jeffrey Parsons',
 'Ji Zhang',
 'Jianping Wang',
 'Jie Tang',
 'Jie Yu',
 'Jim Gray',
 'Jing Zhang',
 'John Collins',
 'John F. McDonald',
 'John Hale',
 'Jose M. Garcia',
 'Juan Carlos Lopez',
 'Kai Tang',
 'Kai Zhang',
 'Ke Chen',
 'Keith Edwards',
 'Koichi Furukawa',
 'Kuo Zhang',
 'Lei Chen',
 'Lei Fang',
 'Lei Jin',
 'Lei Wang',
 'Li Shen',
 'Lu Liu',
 'M. Rahman',
 'Manuel Silva',
 'Mark Davis',
 'Michael Lang',
 'Michael Siegel',
 'Michael Smith',
 'Michael Wagner',
 'Ning Zhang',
 'Paul Brown',
 'Paul Wang',
 'Peter Phillips',
 'Philip J. Smith',
 'Ping Zhou',
 'Qiang shen',
 'R. Balasubramanian',
 'R. Cole',
 'R. Ramesh',
 'Rafael Alonso',
 'Rakesh Kumar',
 'Richard Taylor',
 'Robert Allen',
 'Robert Schreiber',
 'S. Huang',
 'Sanjay Jain',
 'Satoshi Kobayashi',
 'Shu lin',
 'Steve King',
 'Thomas D. Taylor',
 'Thomas Hermann',
 'Thomas Meyer',
 'Thomas Tran',
 'Thomas Wolf',
 'Thomas Zimmermann',
 'Wei Wang',
 'Wei Xu',
 'Wen Gao',
 'William H. Hsu',
 'X. Zhang',
 'Xiaoming Wang',
 'Xiaoyan Li',
 'Yan Tang',
 'Yang Wang',
 'Yang Yu',
 'Yi Deng',
 'Yong Chen',
 'Yoshio Tanaka',
 'Young Park',
 'Yu Zhang',
 'Yue Zhao',
 'Yun Wang',
 'Z. Wang']

In [302]:
for name in names1:
    print(name)
    domodel(name)
    print("\n\n")

F. Wang
Number of edges:  51
Number of nodes:  19
Number of class:  17
sample error!



Fan Wang
Number of edges:  596
Number of nodes:  56
Number of class:  14




1.0 0 True
Max_Accuracy: 1.0
Accuracy: 1.0



Fei Su
Number of edges:  794
Number of nodes:  37
Number of class:  4
0.8571428571428571 0 True
Max_Accuracy: 0.8571428571428571
Accuracy: 0.8571428571428571
1.0 0.8571428571428571 True
Max_Accuracy: 1.0
Accuracy: 1.0



Feng Liu
Number of edges:  1133
Number of nodes:  149
Number of class:  32
0.6296296296296297 0 True
Max_Accuracy: 0.6296296296296297
Accuracy: 0.6296296296296297
0.7037037037037037 0.6296296296296297 True
Max_Accuracy: 0.7037037037037037
Accuracy: 0.7037037037037037
0.6666666666666666 0.7037037037037037 False
Max_Accuracy: 0.7037037037037037
Accuracy: 0.6666666666666666
0.7407407407407407 0.7037037037037037 True
Max_Accuracy: 0.7407407407407407
Accuracy: 0.7407407407407407
0.5925925925925926 0.7407407407407407 False
Max_Accuracy: 0.7407407407407407
Accuracy: 0.5925925925925926



Feng Pan
Number of edges:  748
Number of nodes:  73
Number of class:  15
0.75 0 True
Max_Accuracy: 0.75
Accuracy: 0.75
0.8333333333333334 0.75 Tr

RuntimeError: index out of range at c:\a\w\1\s\tmp_conda_3.6_184213\conda\conda-bld\pytorch_1549565089053\work\aten\src\th\generic/THTensorEvenMoreMath.cpp:191

In [308]:
names2=['Lei Wang',
 'Li Shen',
 'Lu Liu',
 'M. Rahman',
 'Manuel Silva',
 'Mark Davis',
 'Michael Lang',
 'Michael Siegel',
 'Michael Smith',
 'Michael Wagner',
 'Ning Zhang',
 'Paul Brown',
 'Paul Wang',
 'Peter Phillips',
 'Philip J. Smith',
 'Ping Zhou',
 'Qiang shen',
 'R. Balasubramanian',
 'R. Cole',
 'R. Ramesh',
 'Rafael Alonso',
 'Rakesh Kumar',
 'Richard Taylor',
 'Robert Allen',
 'Robert Schreiber',
 'S. Huang',
 'Sanjay Jain',
 'Satoshi Kobayashi',
 'Shu lin',
 'Steve King',
 'Thomas D. Taylor',
 'Thomas Hermann',
 'Thomas Meyer',
 'Thomas Tran',
 'Thomas Wolf',
 'Thomas Zimmermann',
 'Wei Wang',
 'Wei Xu',
 'Wen Gao',
 'William H. Hsu',
 'X. Zhang',
 'Xiaoming Wang',
 'Xiaoyan Li',
 'Yan Tang',
 'Yang Wang',
 'Yang Yu',
 'Yi Deng',
 'Yong Chen',
 'Yoshio Tanaka',
 'Young Park',
 'Yu Zhang',
 'Yue Zhao',
 'Yun Wang',
 'Z. Wang']

In [309]:
for name in names2:
    print(name)
    domodel(name)
    print("\n\n")

Lei Wang
Number of edges:  2987
Number of nodes:  308
Number of class:  112




0.7045454545454546 0 True
Max_Accuracy: 0.7045454545454546
Accuracy: 0.7045454545454546
0.7272727272727273 0.7045454545454546 True
Max_Accuracy: 0.7272727272727273
Accuracy: 0.7272727272727273
0.6818181818181818 0.7272727272727273 False
Max_Accuracy: 0.7272727272727273
Accuracy: 0.6818181818181818
0.7727272727272727 0.7272727272727273 True
Max_Accuracy: 0.7727272727272727
Accuracy: 0.7727272727272727
0.75 0.7727272727272727 False
Max_Accuracy: 0.7727272727272727
Accuracy: 0.75



Li Shen
Number of edges:  749
Number of nodes:  68
Number of class:  9
0.8333333333333334 0 True
Max_Accuracy: 0.8333333333333334
Accuracy: 0.8333333333333334
0.8333333333333334 0.8333333333333334 False
Max_Accuracy: 0.8333333333333334
Accuracy: 0.8333333333333334
0.9166666666666666 0.8333333333333334 True
Max_Accuracy: 0.9166666666666666
Accuracy: 0.9166666666666666
1.0 0.9166666666666666 True
Max_Accuracy: 1.0
Accuracy: 1.0



Lu Liu
Number of edges:  411
Number of nodes:  58
Number of class:  17
1.0 0 True


ValueError: max() arg is an empty sequence

In [310]:
names3=['Wei Xu',
 'Wen Gao',
 'William H. Hsu',
 'X. Zhang',
 'Xiaoming Wang',
 'Xiaoyan Li',
 'Yan Tang',
 'Yang Wang',
 'Yang Yu',
 'Yi Deng',
 'Yong Chen',
 'Yoshio Tanaka',
 'Young Park',
 'Yu Zhang',
 'Yue Zhao',
 'Yun Wang',
 'Z. Wang']

In [311]:
for name in names3:
    print(name)
    domodel(name)
    print("\n\n")

Wei Xu
Number of edges:  1835
Number of nodes:  153
Number of class:  48




0.7916666666666666 0 True
Max_Accuracy: 0.7916666666666666
Accuracy: 0.7916666666666666
0.6666666666666666 0.7916666666666666 False
Max_Accuracy: 0.7916666666666666
Accuracy: 0.6666666666666666
0.8333333333333334 0.7916666666666666 True
Max_Accuracy: 0.8333333333333334
Accuracy: 0.8333333333333334
0.75 0.8333333333333334 False
Max_Accuracy: 0.8333333333333334
Accuracy: 0.75
0.6666666666666666 0.8333333333333334 False
Max_Accuracy: 0.8333333333333334
Accuracy: 0.6666666666666666



Wen Gao
Number of edges:  37470
Number of nodes:  484
Number of class:  10
0.968421052631579 0 True
Max_Accuracy: 0.968421052631579
Accuracy: 0.968421052631579
0.9789473684210527 0.968421052631579 True
Max_Accuracy: 0.9789473684210527
Accuracy: 0.9789473684210527
0.9789473684210527 0.9789473684210527 False
Max_Accuracy: 0.9789473684210527
Accuracy: 0.9789473684210527
0.968421052631579 0.9789473684210527 False
Max_Accuracy: 0.9789473684210527
Accuracy: 0.968421052631579
0.9894736842105263 0.9789473684210527 Tr

SAXParseException: Data\Yun Wang\Yun Wang.xml:227:0: no element found

In [312]:
names4=[ 'Yun Wang',
 'Z. Wang']

In [314]:
for name in names4:
    print(name)
    domodel(name)
    print("\n\n")

Yun Wang
Number of edges:  180
Number of nodes:  46
Number of class:  19




0.6666666666666666 0 True
Max_Accuracy: 0.6666666666666666
Accuracy: 0.6666666666666666
0.8333333333333334 0.6666666666666666 True
Max_Accuracy: 0.8333333333333334
Accuracy: 0.8333333333333334
0.8333333333333334 0.8333333333333334 False
Max_Accuracy: 0.8333333333333334
Accuracy: 0.8333333333333334
0.8333333333333334 0.8333333333333334 False
Max_Accuracy: 0.8333333333333334
Accuracy: 0.8333333333333334
0.6666666666666666 0.8333333333333334 False
Max_Accuracy: 0.8333333333333334
Accuracy: 0.6666666666666666



Z. Wang
Number of edges:  127
Number of nodes:  47
Number of class:  38
1.0 0 True
Max_Accuracy: 1.0
Accuracy: 1.0





In [1]:
names5 = ["Barry Wilkinson","Eric Martin","Ping Zhou"]
names5 = ["Lei Fang","Paul Brown","Steve King"]
names5 = ["Eric Martin"]

In [2]:
for name in names5:
    print(name)
    domodel(name)
    print("\n\n")

Eric Martin


NameError: name 'domodel' is not defined

In [204]:
model = GCN_Plus_RNN(RNN_input_size,
                     RNN_hidden_size,
                     RGCN_input_size,
                     RGCN_hidden_size,
                     Num_classes,
                     Num_rels,
                     Num_bases=-1,
                     Num_hidden_layers=0,
                     dropout=dropout)

In [205]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=0)
criterion = nn.CrossEntropyLoss()

In [206]:
model.train()
for i in range(201):
    optimizer.zero_grad()
    output = model(g, inputs, sequence_length)
    loss = criterion(output[train_idx], labels_y[train_idx])
    loss.backward()
    optimizer.step()
    if i % 50 == 0:
        print(loss.data)
        print("Train Accuracy: ",Comput_Accuracy(train_idx))
        print("Validation Accuracy: ",Comput_Accuracy(test_idx))



tensor(5.5435)
Train Accuracy:  0.011363636363636364
Validation Accuracy:  0.015151515151515152
tensor(0.3368)
Train Accuracy:  0.9204545454545454
Validation Accuracy:  0.8636363636363636
tensor(0.0869)
Train Accuracy:  0.9734848484848485
Validation Accuracy:  0.9242424242424242
tensor(0.0593)
Train Accuracy:  0.9848484848484849
Validation Accuracy:  0.9090909090909091
tensor(0.0440)
Train Accuracy:  0.9886363636363636
Validation Accuracy:  0.9090909090909091
