In [1]:
import pandas as pd 
import numpy as np
import networkx as nx
import scipy.sparse as sp
from sklearn import preprocessing
import matplotlib.pyplot as plt
import csv
import torch
import itertools 

### Useful functions: 

In [2]:
def normalize(mx):
    """Row-normalize sparse matrix"""
    rowsum = np.array(mx.sum(1))
    r_inv = np.power(rowsum, -1).flatten()
    r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
    mx = r_mat_inv.dot(mx)
    return mx

In [3]:
def threshold(output):
    output[output > 0.5] = 1
    output[output <= 0.5] = 0
    return output

In [4]:
def accuracy_sample_class(output, labels):
    """ 
    output is of shape (N,C)
    Labels is of shape (N,C)
    Result : acc gives the accuracy computed according to the sample-class view
    """
    N = labels.shape[0]
    C = labels.shape[1]
    corr = np.sum(np.equal(output, labels))
    # corr is the number of equal elements between labels and output and thus the number of correctly classified 
    # labels for each sample 
    acc = corr/(N*C)
    return acc

In [5]:
def micro_F1(output, labels):
    TP = np.array([])
    FN = np.array([])
    FP = np.array([])
    N = labels.shape[0]
    L = labels.shape[1]
    for l in range(L):
        tp = 0
        fn = 0
        fp = 0
        ol = output[:,l]
        ll = labels[:,l]
        for i in range(N):
            if ll[i] == 1 and ol[i] == 1:
                tp +=1
            if ol[i] == 0 and ll[i] == 1:
                fn +=1
            if ol[i] == 1 and ll[i] == 0:
                fp +=1
        TP = np.append(TP, tp)
        FN = np.append(FN, fn)
        FP = np.append(FP, fp)
    return np.sum(2*TP)/np.sum(2*TP + FN + FP)

In [35]:
a = np.array([[1,0,1],
              [1,0,1]])
b = np.array([[1,0,1],
              [1,0,1]])
micro_F1(a,b)

1.0

In [6]:
def macro_F1(output, labels):
    TP = np.array([])
    FN = np.array([])
    FP = np.array([])
    N = labels.shape[0]
    L = labels.shape[1]
    for l in range(L):
        tp = 0
        fn = 0
        fp = 0
        ol = output[:,l]
        ll = labels[:,l]
        for i in range(N):
            if ll[i] == 1 and ol[i] == 1:
                tp +=1
            if ol[i] == 0 and ll[i] == 1:
                fn +=1
            if ol[i] == 1 and ll[i] == 0:
                fp +=1
        TP = np.append(TP, tp)
        FN = np.append(FN, fn)
        FP = np.append(FP, fp)
    return np.sum(2*TP/(2*TP + FN + FP))

### Load Function:

In [7]:
# number of labels
m = 39

In [8]:
def load_data(data_name):
    edges_file = data_name + "/edges.csv"
    node_label_file = data_name + "/group-edges.csv"
    label_occ_file = data_name + "/label_co-occurences.csv"
    nnlg_file = data_name + "/edges_node_node_label.csv"
    llng_file = data_name + "/edges_label_label_node.csv"
    label_raw, nodes = [], []
    with open(node_label_file) as file_to_read: 
        while True:
            lines = file_to_read.readline()
            if not lines:
                break 
            node, label = lines.split(",")
            label_raw.append(int(label))
            nodes.append(int(node))
    label_raw = np.array(label_raw)
    nodes = np.array(nodes)
    unique_nodes = np.unique(nodes)
    labels = np.zeros((unique_nodes.shape[0], m))
    for l in range(1, m+1, 1):
        indices = np.argwhere(label_raw == l).reshape(-1)
        n_l = nodes[indices]
        for n in n_l:
            labels[n-1][l-1] = 1
            
    label_nodes = label_raw + unique_nodes.shape[0] 
    n_n_l_nodes = np.concatenate((unique_nodes, np.unique(label_nodes)))
    df = pd.DataFrame(list())
    df.to_csv(nnlg_file)
    f = open(nnlg_file, "r+")
    file_to_read = open(edges_file, "r")
    f.writelines(file_to_read.readlines())
    a = np.dstack((nodes, label_nodes)).reshape(label_nodes.shape[0],2)
    e = ["\n"] + [",".join(item)+"\n" for item in a.astype(str)]
    f.writelines(e)
    f.close()
    
    nnlg_file = "BlogCatalog/edges_node_node_label.csv"
    nnl_graph = nx.read_edgelist(nnlg_file, delimiter = ",", nodetype = int)
    E = nx.adjacency_matrix(nnl_graph, nodelist = n_n_l_nodes)
    main_graph = open(edges_file, "rb")
    G = nx.read_edgelist(main_graph, delimiter = ",", nodetype = int)
    A = nx.adjacency_matrix(G, nodelist = unique_nodes)
    A = sp.coo_matrix(A.todense())
    X = sp.csr_matrix(A)
    # Normalize the adjancency matrix with the added self loops
    A_tilde = normalize(A + sp.eye(A.shape[0]))
    
    # Label-label-node graph 
    edges = []
    list_edges = []
    for k in range(labels.shape[0]):
        indices = np.argwhere(labels[k] == 1).reshape(-1)
        if indices.shape[0]>1:
            for subset in itertools.combinations(indices, 2): 
                if (list(subset) not in list_edges) or ([subset[1], subset[0]] not in list_edges):
                    list_edges.append([subset[0]+labels.shape[0], subset[1]+labels.shape[0]])# check if the common nodes should be before or after the label nodes
                    edges.append(str(subset[0]+1 + labels.shape[0]) + "," + str(subset[1] +1 + labels.shape[0]) + "\n")
    df = pd.DataFrame(list())
    df.to_csv(label_occ_file)
    label_file = open(label_occ_file, "r+")
    label_file.writelines(edges)
    label_file.close()
    unique_label_ID = np.arange(1,40) + labels.shape[0]
    label_file = open(label_occ_file, "rb")
    label_graph = nx.read_edgelist(label_file, delimiter = ",", nodetype = int)
    C = nx.adjacency_matrix(label_graph, nodelist = unique_label_ID)
    label_file.close()
    # Normalize the co-occurence matrix with the added self loops
    C_tilde = normalize(C + sp.eye(C.shape[0]))
    Y = X[:m]
    labels_ind = label_raw + labels.shape[0]
    a_1 = np.dstack((labels_ind,nodes)).reshape(labels_ind.shape[0],2)
    e_1 = [",".join(item)+"\n" for item in a_1.astype(str)]
    file = open(label_occ_file, "r+")
    file.writelines(e_1)
    file.close()
    f_1 = open(label_occ_file, "rb")
    l_l_n_nodes = np.concatenate((np.unique(nodes),np.unique(labels_ind)))
    lln_graph = nx.read_edgelist(f_1, delimiter = ",", nodetype = int)
    F = nx.adjacency_matrix(lln_graph, nodelist = l_l_n_nodes)
    F = sp.coo_matrix(F.todense())
    f_1.close()
    # Normalize the adjancency matrix with the added self loops
    E = normalize(E + sp.eye(E.shape[0]))
    E_tilde = E[:len(unique_nodes)]
    # Normalize the adjancency matrix with the added self loops
    F = normalize(F + sp.eye(F.shape[0]))
    F_tilde = F[len(unique_nodes):]
    
    indices = np.arange(A.shape[0]).astype('int32') # should be shuffled
    np.random.shuffle(indices)
    
    idx_train = indices[:A.shape[0] // 3]
    idx_val = indices[A.shape[0] // 3: (2 * A.shape[0]) // 3]
    idx_test = indices[(2 * A.shape[0]) // 3:]
    idx_train = torch.LongTensor(idx_train)
    idx_val = torch.LongTensor(idx_val)
    idx_test = torch.LongTensor(idx_test)
    C_tilde = torch.FloatTensor(np.array(C_tilde.todense()))
    E_tilde = torch.FloatTensor(np.array(E_tilde.todense()))
    F_tilde = torch.FloatTensor(np.array(F_tilde.todense()))
    A_tilde = torch.FloatTensor(np.array(A_tilde.todense()))
    labels = torch.FloatTensor(labels)
    Y = torch.FloatTensor(np.array(Y.todense()))
    X = torch.FloatTensor(np.array(X.todense()))
    
    return X, Y, F_tilde, E_tilde, C_tilde,A_tilde, idx_train, idx_val,idx_test, labels

In [9]:
X, Y, F_tilde, E_tilde, C_tilde, A_tilde, idx_train, idx_val,idx_test, labels = load_data("BlogCatalog")

In [10]:
Y_star = torch.FloatTensor(sp.vstack((Y,X)).todense())
X_star = torch.FloatTensor(sp.vstack((X,Y)).todense())

### Let's get down to the training  

In [11]:
from Code.models import High_Layer, Low_Layer
import time
from __future__ import division
from __future__ import print_function

import time
import argparse
import numpy as np

import torch
import torch.nn.functional as F
import torch.optim as optim

In [12]:
parser = argparse.ArgumentParser()
parser.add_argument('--no-cuda', action='store_true', default=False,
                    help='Disables CUDA training.')
parser.add_argument('--fastmode', action='store_true', default=False,
                    help='Validate during training pass.')
parser.add_argument('--seed', type=int, default=42, help='Random seed.')
parser.add_argument('--epochs', type=int, default=300,
                    help='Number of epochs to train.')
parser.add_argument('--lr', type=float, default=0.02,
                    help='Initial learning rate.')
parser.add_argument('--weight_decay', type=float, default=0,
                    help='Weight decay (L2 loss on parameters).')
parser.add_argument('--hidden', type=int, default=400,
                    help='Number of hidden units.')
parser.add_argument('--dropout', type=float, default=0.5,
                    help='Dropout rate (1 - keep probability).')
parser.add_argument('-f')

args = parser.parse_args()
args.cuda = not args.no_cuda and torch.cuda.is_available()
np.random.seed(args.seed)
torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)

In [13]:
# Define some variables 
n = A_tilde.shape[0]
m = C_tilde.shape[0]

In [14]:
# Define the models (high layer and low layer) and the optimizers for each on of them
low_layer = Low_Layer(nfeat = A_tilde.shape[0],
                      nhid = args.hidden,
                      nclass = C_tilde.shape[0],
                      dropout = args.dropout)
optimizer_lowlayer = optim.SGD(low_layer.parameters(),lr = args.lr, weight_decay = args.weight_decay)
        
high_layer = High_Layer(nfeat = A_tilde.shape[0],
                        nhid = args.hidden,
                        nclass = C_tilde.shape[0],
                        dropout = args.dropout)
optimizer_highlayer = optim.SGD(high_layer.parameters(),lr = args.lr, weight_decay = args.weight_decay)

In [15]:
truth = torch.LongTensor(np.arange(m))

In [16]:
print(Y.shape)

torch.Size([39, 10312])


In [17]:
low_layer.gc2.weight.shape

torch.Size([400, 39])

In [18]:
low_layer.fc1.weight.shape

torch.Size([10312, 39])

In [19]:
def train(epoch, M, N):# Put M and N in args 
    
    X_star1 = X_star
    Y_star1 = Y_star
    t = time.time()
    # train the two layers
    low_layer.train()
    optimizer_lowlayer.zero_grad()
    high_layer.train()
    optimizer_highlayer.zero_grad()
    # The output of the two layers
    Y_new = high_layer(Y_star1, F_tilde, C_tilde)
    X_new = low_layer(X_star1, E_tilde, A_tilde)
    print(X_new.shape)
    # The train losses
    loss_train_hl = F.cross_entropy(torch.sigmoid(Y_new), truth)
    softmax_output = F.log_softmax(X_new, dim = 1)
    loss_train_ll = (1/m)*np.sum([F.binary_cross_entropy_with_logits(softmax_output[idx_train][:,i], labels[idx_train][:,i]) for i in range(m)])
    
    print(type(Y))
    print(type(X_new))
    acc_train = accuracy_sample_class(threshold(softmax_output.detach().numpy()[idx_train]), labels.detach().numpy()[idx_train])
    if epoch%M == 0:
        W_l = torch.transpose(high_layer.fc1.weight, 0, 1)
        Y_new1 = torch.mm(Y_new, W_l)
        Y_star1 = torch.cat((X, Y_new1), dim = 0)
    if epoch%N == 0:
        W_v = torch.transpose(low_layer.fc1.weight, 0, 1)
        Y_new1 = torch.mm(X_new, W_v)
        X_star1 = torch.cat((X,Y_new1), dim = 0)
        
    
    loss_train_ll.backward()
    loss_train_hl.backward()
    optimizer_lowlayer.step()
    optimizer_highlayer.step()
    
    
    loss_val = (1/39)*np.sum([F.binary_cross_entropy_with_logits(softmax_output[idx_val][:,i], labels[idx_val][:,i]) for i in range(C_tilde.shape[0])])
    acc_val = accuracy_sample_class(threshold(softmax_output.detach().numpy()[idx_val]), labels.detach().numpy()[idx_val])
    
    print('Epoch: {:04d}'.format(epoch+1),
          'loss_train: {:.4f}'.format(loss_train_ll.item()),
          'acc_train: {:.4f}'.format(acc_train.item()),
          'loss_val: {:.4f}'.format(loss_val.item()),
          'acc_val: {:.4f}'.format(acc_val.item()),
          'time: {:.4f}s'.format(time.time() - t))

In [22]:
def train1(epoch, M, N):# Put M and N in args 
    
    X_star1 = X_star
    Y_star1 = Y_star
    t = time.time()
    # train the two layers
    low_layer.train()
    optimizer_lowlayer.zero_grad()
    high_layer.train()
    optimizer_highlayer.zero_grad()
    # The output of the two layers
    Y_new = high_layer(Y_star1, F_tilde, C_tilde)
    X_new = low_layer(X_star1, E_tilde, A_tilde)
    # The train losses
    loss_train_hl = F.cross_entropy(F.log_softmax(Y_new,dim = 1), truth)
    sigmoid_output = torch.sigmoid(X_new)
    loss_train_ll = (1/m)*np.sum([F.binary_cross_entropy_with_logits(sigmoid_output[idx_train][:,i], labels[idx_train][:,i]) for i in range(m)])
    
    
    acc_train = macro_F1(threshold(sigmoid_output.detach().numpy()[idx_train]), labels.detach().numpy()[idx_train])
    if epoch%M == 0:
        W_l = torch.transpose(high_layer.fc1.weight, 0, 1)
        Y_new1 = torch.mm(Y_new, W_l)
        Y_star1 = torch.cat((X, Y_new1), dim = 0)
    if epoch%N == 0:
        W_v = torch.transpose(low_layer.fc1.weight, 0, 1)
        Y_new1 = torch.mm(X_new, W_v)
        X_star1 = torch.cat((X,Y_new1), dim = 0)
        
    
    loss_train_ll.backward()
    loss_train_hl.backward()
    optimizer_lowlayer.step()
    optimizer_highlayer.step()
    
    
    loss_val = (1/m)*np.sum([F.binary_cross_entropy_with_logits(sigmoid_output[idx_val][:,i], labels[idx_val][:,i]) for i in range(C_tilde.shape[0])])
    acc_val = macro_F1(threshold(sigmoid_output.detach().numpy()[idx_val]), labels.detach().numpy()[idx_val])
    
    print('Epoch: {:04d}'.format(epoch+1),
          'loss_train: {:.4f}'.format(loss_train_ll.item()),
          'acc_train: {:.4f}'.format(acc_train.item()),
          'loss_val: {:.4f}'.format(loss_val.item()),
          'acc_val: {:.4f}'.format(acc_val.item()),
          'time: {:.4f}s'.format(time.time() - t))

In [25]:
low_layer = Low_Layer(nfeat = A_tilde.shape[0],
                      nhid = args.hidden,
                      nclass = C_tilde.shape[0],
                      dropout = args.dropout)

high_layer = High_Layer(nfeat = A_tilde.shape[0],
                        nhid = args.hidden,
                        nclass = C_tilde.shape[0],
                        dropout = args.dropout)
params = list(high_layer.parameters()) + list(low_layer.parameters())
optimizer = optim.SGD(params,lr = args.lr, weight_decay = args.weight_decay)

In [38]:
def train2(epoch, M, N):# Put M and N in args 
    
    X_star1 = X_star
    Y_star1 = Y_star
    t = time.time()
    # train the two layers
    low_layer.train()
    high_layer.train()
    optimizer.zero_grad()
    # The output of the two layers
    Y_new = high_layer(Y_star1, F_tilde, C_tilde)
    X_new = low_layer(X_star1, E_tilde, A_tilde)
    # The train losses
    loss_train_hl = F.cross_entropy(F.log_softmax(Y_new,dim = 1), truth)
    sigmoid_output = torch.sigmoid(X_new)
    loss_train_ll = (1/m)*np.sum([F.binary_cross_entropy_with_logits(sigmoid_output[idx_train][:,i], labels[idx_train][:,i]) for i in range(m)])
    
    #print(threshold(sigmoid_output))
    acc_train = micro_F1(threshold(sigmoid_output.detach().numpy()[idx_train]), labels.detach().numpy()[idx_train])
    if epoch%M == 0:
        W_l = torch.transpose(high_layer.fc1.weight, 0, 1)
        Y_new1 = torch.mm(Y_new, W_l)
        Y_star1 = torch.cat((X, Y_new1), dim = 0)
    if epoch%N == 0:
        W_v = torch.transpose(low_layer.fc1.weight, 0, 1)
        Y_new1 = torch.mm(X_new, W_v)
        X_star1 = torch.cat((X,Y_new1), dim = 0)
        
    
    loss_train = loss_train_hl + loss_train_ll
    loss_train.backward()
    optimizer.step()
    
    
    loss_val = (1/m)*np.sum([F.binary_cross_entropy_with_logits(sigmoid_output[idx_val][:,i], labels[idx_val][:,i]) for i in range(C_tilde.shape[0])])
    acc_val = micro_F1(threshold(sigmoid_output.detach().numpy()[idx_val]), labels.detach().numpy()[idx_val])
    
    print('Epoch: {:04d}'.format(epoch+1),
          'loss_train: {:.4f}'.format(loss_train.item()),
          'acc_train: {:.4f}'.format(acc_train.item()),
          'loss_val: {:.4f}'.format(loss_val.item()),
          'acc_val: {:.4f}'.format(acc_val.item()),
          'time: {:.4f}s'.format(time.time() - t))

In [39]:
train2(1, 50, 50)

Epoch: 0002 loss_train: 4.6223 acc_train: 0.0592 loss_val: 0.9487 acc_val: 0.0594 time: 14.6953s


In [40]:
t_total = time.time()
for epoch in range(args.epochs):
    train2(epoch, 50, 50)

Epoch: 0001 loss_train: 4.6232 acc_train: 0.0605 loss_val: 0.9478 acc_val: 0.0616 time: 20.1691s
Epoch: 0002 loss_train: 4.6228 acc_train: 0.0583 loss_val: 0.9473 acc_val: 0.0587 time: 9.2118s
Epoch: 0003 loss_train: 4.6213 acc_train: 0.0579 loss_val: 0.9464 acc_val: 0.0599 time: 9.8017s
Epoch: 0004 loss_train: 4.6202 acc_train: 0.0593 loss_val: 0.9458 acc_val: 0.0606 time: 8.2963s
Epoch: 0005 loss_train: 4.6175 acc_train: 0.0593 loss_val: 0.9445 acc_val: 0.0607 time: 9.9459s
Epoch: 0006 loss_train: 4.6183 acc_train: 0.0600 loss_val: 0.9442 acc_val: 0.0608 time: 10.9338s
Epoch: 0007 loss_train: 4.6155 acc_train: 0.0602 loss_val: 0.9432 acc_val: 0.0599 time: 8.7221s
Epoch: 0008 loss_train: 4.6152 acc_train: 0.0589 loss_val: 0.9425 acc_val: 0.0603 time: 7.8554s
Epoch: 0009 loss_train: 4.6133 acc_train: 0.0591 loss_val: 0.9413 acc_val: 0.0603 time: 8.4021s
Epoch: 0010 loss_train: 4.6096 acc_train: 0.0590 loss_val: 0.9404 acc_val: 0.0610 time: 10.1922s
Epoch: 0011 loss_train: 4.6140 acc_tr

KeyboardInterrupt: 