In [1]:
from input.dataset import Dataset, SynDataset
from time import time
from algorithms import *
from evaluation.metrics import get_statistics
import utils.graph_utils as graph_utils
import random
import numpy as np
import torch
import argparse
import os
import pdb

import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
def parse_args():
    parser = argparse.ArgumentParser(description="Network alignment")
    parser.add_argument('--source_dataset', default="data/allmv_tmdb/allmv/graphsage/")
    parser.add_argument('--target_dataset', default="data/allmv_tmdb/tmdb/graphsage/")
    parser.add_argument('--groundtruth',    default="data/allmv_tmdb/dictionaries/groundtruth")
    parser.add_argument('--seed',           default=123,    type=int)
    subparsers = parser.add_subparsers(dest="algorithm", help='Choose 1 of the algorithm from: FINAL, REGAL, ANAGCN')
    

    # FINAL
    parser_final = subparsers.add_parser('FINAL', help='FINAL algorithm')
    parser_final.add_argument('--H',                   default=None, help="Priority matrix")
    parser_final.add_argument('--max_iter',            default=30, type=int, help="Max iteration")
    parser_final.add_argument('--alpha',               default=0.6, type=float)
    parser_final.add_argument('--tol',                 default=1e-2, type=float)
    parser_final.add_argument('--train_dict', default='data/allmv_tmdb/dictionaries/node,split=01.train.dict', type=str)


    # REGAL
    parser_regal = subparsers.add_parser('REGAL', help='REGAL algorithm')
    parser_regal.add_argument('--attrvals', type=int, default=2,
                        help='Number of attribute values. Only used if synthetic attributes are generated')
    parser_regal.add_argument('--dimensions', type=int, default=128,
                        help='Number of dimensions. Default is 128.')
    parser_regal.add_argument('--k', type=int, default=10,
                        help='Controls of landmarks to sample. Default is 10.')
    parser_regal.add_argument('--max_layer', type=int, default=2,
                        help='Calculation until the layer for xNetMF.')
    parser_regal.add_argument('--alpha', type=float, default=0.01, help="Discount factor for further layers")
    parser_regal.add_argument('--gammastruc', type=float, default=1, help="Weight on structural similarity")
    parser_regal.add_argument('--gammaattr', type=float, default=1, help="Weight on attribute similarity")
    parser_regal.add_argument('--num_top', type=int, default=10,
                        help="Number of top similarities to compute with kd-tree.  If 0, computes all pairwise similarities.")
    parser_regal.add_argument('--buckets', default=2, type=float, help="base of log for degree (node feature) binning")


    
    # ANAGCN
    parser_ANAGCN = subparsers.add_parser("ANAGCN", help="ANAGCN algorithm")
    parser_ANAGCN.add_argument('--cuda',                action="store_true")
    
    parser_ANAGCN.add_argument('--embedding_dim',       default=200,         type=int)
    parser_ANAGCN.add_argument('--emb_epochs',    default=20,        type=int)
    parser_ANAGCN.add_argument('--lr', default=0.01, type=float)
    parser_ANAGCN.add_argument('--num_GCN_blocks', type=int, default=2)
    parser_ANAGCN.add_argument('--act', type=str, default='tanh')
    parser_ANAGCN.add_argument('--log', action="store_true", help="Just to print loss")
    parser_ANAGCN.add_argument('--invest', action="store_true", help="To do some statistics")
    parser_ANAGCN.add_argument('--alpha0', type=float, default=1)
    parser_ANAGCN.add_argument('--alpha1', type=float, default=1)
    parser_ANAGCN.add_argument('--alpha2', type=float, default=1)
    # refinement
    parser_ANAGCN.add_argument('--refinement_epochs', default=10, type=int)
    parser_ANAGCN.add_argument('--refine', action="store_false", help="wheather to use refinement step")
    parser_ANAGCN.add_argument('--threshold_refine', type=float, default=0.94, help="The threshold value to get stable candidates")
    # augmentation, let noise_level = 0 if dont want to use it
    parser_ANAGCN.add_argument('--noise_level', default=0.001, type=float, help="noise to add to augment graph")
    parser_ANAGCN.add_argument('--beta', default=0.2, type=float, 
                    help="weight balance between graph structure and augment graph structure")
    parser_ANAGCN.add_argument('--coe_consistency', default=0.2, type=float, help="consistency weight")
    parser_ANAGCN.add_argument('--threshold', default=0.01, type=float, 
                    help="Threshold of for sharpenning")


    return parser.parse_args("--source_dataset data/yale1_yale2/yale1/graphsage/ \
--target_dataset data/yale1_yale2/yale2/graphsage/ \
--groundtruth data/yale1_yale2/dictionaries/groundtruth \
ANAGCN \
--embedding_dim 200 \
--emb_epochs 200 \
--lr 0.01 \
--num_GCN_blocks 2 \
--noise_level 0.001 \
--cuda \
--refinement_epoch 50 \
--refine \
--log ".split())



In [2]:
args = parse_args()

In [3]:
start_time = time()
random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)

source_dataset = Dataset(args.source_dataset)
target_dataset = Dataset(args.target_dataset)
groundtruth = graph_utils.load_gt(args.groundtruth, source_dataset.id2idx, target_dataset.id2idx, 'dict')






data/yale1_yale2/yale1/graphsage/
Dataset info:
- Nodes:  8578
- Edges:  283815
data/yale1_yale2/yale2/graphsage/
Dataset info:
- Nodes:  8578
- Edges:  283815


In [20]:
groundtruth.features

AttributeError: 'dict' object has no attribute 'features'

In [21]:
source_dataset.features

array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])

In [6]:
s_edges_ = np.where(s_adj==1)
s_edges = []
for i in range(len(s_edges_[0])):
    s_edges.append((s_edges_[0][i],s_edges_[1][i]))

In [7]:
t_adj = target_dataset.get_adjacency_matrix()

In [8]:
t_edges_ = np.where(t_adj==1)
t_edges = []
for i in range(len(t_edges_[0])):
    t_edges.append((t_edges_[0][i],t_edges_[1][i]))


In [9]:
s_edges_new = []
for i in s_edges:
    s_edges_new.append((groundtruth[i[0]],groundtruth[i[1]]))

In [10]:
len(set(s_edges_new)&set(t_edges))

397336

In [11]:
algorithm = args.algorithm

if algorithm == "FINAL":
    train_dict = None
    if args.train_dict != "":
        train_dict = graph_utils.load_gt(args.train_dict, source_dataset.id2idx, target_dataset.id2idx, 'dict')
    model = FINAL(source_dataset, target_dataset, H=args.H, alpha=args.alpha, maxiter=args.max_iter, tol=args.tol, train_dict=train_dict)
elif algorithm == "REGAL":
    model = REGAL(source_dataset, target_dataset, max_layer=args.max_layer, alpha=args.alpha, k=args.k, num_buckets=args.buckets,
                  gammastruc = args.gammastruc, gammaattr = args.gammaattr, normalize=True, num_top=args.num_top)
elif algorithm == "ANAGCN":
    model = ANAGCN(source_dataset, target_dataset, args)
else:
    raise Exception("Unsupported algorithm")

S = model.align()

Structure learning epoch: 0
Loss: 0.7871
Structure Loss: 0.9839
Loss: 0.8124
Structure Loss: 1.0154
Loss: 0.8098
Structure Loss: 1.0123
Loss: 0.8193
Structure Loss: 1.0241
Loss: 0.8192
Structure Loss: 1.0240
Loss: 0.8192
Structure Loss: 1.0240
Structure learning epoch: 1
Loss: 0.8087
Structure Loss: 1.0109
Loss: 0.8071
Structure Loss: 1.0089
Loss: 0.8054
Structure Loss: 1.0067
Loss: 0.8182
Structure Loss: 1.0227
Loss: 0.8173
Structure Loss: 1.0215
Loss: 0.8167
Structure Loss: 1.0208
Structure learning epoch: 2
Loss: 0.8049
Structure Loss: 1.0061
Loss: 0.8037
Structure Loss: 1.0046
Loss: 0.8026
Structure Loss: 1.0032
Loss: 0.8160
Structure Loss: 1.0200
Loss: 0.8144
Structure Loss: 1.0180
Loss: 0.8138
Structure Loss: 1.0173
Structure learning epoch: 3
Loss: 0.8009
Structure Loss: 1.0012
Loss: 0.8004
Structure Loss: 1.0005
Loss: 0.7991
Structure Loss: 0.9989
Loss: 0.8122
Structure Loss: 1.0153
Loss: 0.8122
Structure Loss: 1.0152
Loss: 0.8109
Structure Loss: 1.0136
Structure learning epoch

Loss: 0.7469
Structure Loss: 0.9337
Structure learning epoch: 34
Loss: 0.7455
Structure Loss: 0.9318
Loss: 0.7452
Structure Loss: 0.9315
Loss: 0.7448
Structure Loss: 0.9310
Loss: 0.7470
Structure Loss: 0.9338
Loss: 0.7466
Structure Loss: 0.9332
Loss: 0.7462
Structure Loss: 0.9327
Structure learning epoch: 35
Loss: 0.7449
Structure Loss: 0.9311
Loss: 0.7447
Structure Loss: 0.9308
Loss: 0.7443
Structure Loss: 0.9304
Loss: 0.7463
Structure Loss: 0.9329
Loss: 0.7459
Structure Loss: 0.9323
Loss: 0.7455
Structure Loss: 0.9318
Structure learning epoch: 36
Loss: 0.7444
Structure Loss: 0.9306
Loss: 0.7442
Structure Loss: 0.9302
Loss: 0.7438
Structure Loss: 0.9298
Loss: 0.7455
Structure Loss: 0.9319
Loss: 0.7452
Structure Loss: 0.9314
Loss: 0.7447
Structure Loss: 0.9309
Structure learning epoch: 37
Loss: 0.7441
Structure Loss: 0.9301
Loss: 0.7438
Structure Loss: 0.9297
Loss: 0.7435
Structure Loss: 0.9293
Loss: 0.7450
Structure Loss: 0.9313
Loss: 0.7446
Structure Loss: 0.9307
Loss: 0.7442
Structu

Loss: 0.7380
Structure Loss: 0.9224
Loss: 0.7384
Structure Loss: 0.9230
Loss: 0.7380
Structure Loss: 0.9224
Loss: 0.7375
Structure Loss: 0.9219
Structure learning epoch: 68
Loss: 0.7385
Structure Loss: 0.9231
Loss: 0.7382
Structure Loss: 0.9227
Loss: 0.7378
Structure Loss: 0.9222
Loss: 0.7382
Structure Loss: 0.9228
Loss: 0.7378
Structure Loss: 0.9222
Loss: 0.7374
Structure Loss: 0.9217
Structure learning epoch: 69
Loss: 0.7384
Structure Loss: 0.9230
Loss: 0.7381
Structure Loss: 0.9226
Loss: 0.7377
Structure Loss: 0.9221
Loss: 0.7380
Structure Loss: 0.9226
Loss: 0.7376
Structure Loss: 0.9219
Loss: 0.7372
Structure Loss: 0.9215
Structure learning epoch: 70
Loss: 0.7384
Structure Loss: 0.9229
Loss: 0.7381
Structure Loss: 0.9226
Loss: 0.7377
Structure Loss: 0.9221
Loss: 0.7380
Structure Loss: 0.9225
Loss: 0.7376
Structure Loss: 0.9219
Loss: 0.7371
Structure Loss: 0.9214
Structure learning epoch: 71
Loss: 0.7382
Structure Loss: 0.9227
Loss: 0.7379
Structure Loss: 0.9223
Loss: 0.7375
Structu

Loss: 0.7356
Structure Loss: 0.9195
Structure learning epoch: 101
Loss: 0.7366
Structure Loss: 0.9207
Loss: 0.7364
Structure Loss: 0.9204
Loss: 0.7359
Structure Loss: 0.9199
Loss: 0.7363
Structure Loss: 0.9204
Loss: 0.7360
Structure Loss: 0.9199
Loss: 0.7356
Structure Loss: 0.9195
Structure learning epoch: 102
Loss: 0.7366
Structure Loss: 0.9207
Loss: 0.7363
Structure Loss: 0.9203
Loss: 0.7359
Structure Loss: 0.9198
Loss: 0.7365
Structure Loss: 0.9206
Loss: 0.7361
Structure Loss: 0.9201
Loss: 0.7356
Structure Loss: 0.9195
Structure learning epoch: 103
Loss: 0.7365
Structure Loss: 0.9206
Loss: 0.7363
Structure Loss: 0.9203
Loss: 0.7359
Structure Loss: 0.9198
Loss: 0.7363
Structure Loss: 0.9204
Loss: 0.7361
Structure Loss: 0.9200
Loss: 0.7357
Structure Loss: 0.9196
Structure learning epoch: 104
Loss: 0.7365
Structure Loss: 0.9206
Loss: 0.7363
Structure Loss: 0.9203
Loss: 0.7358
Structure Loss: 0.9198
Loss: 0.7361
Structure Loss: 0.9201
Loss: 0.7359
Structure Loss: 0.9198
Loss: 0.7355
Str

Loss: 0.7351
Structure Loss: 0.9188
Loss: 0.7355
Structure Loss: 0.9194
Loss: 0.7354
Structure Loss: 0.9191
Loss: 0.7349
Structure Loss: 0.9186
Structure learning epoch: 135
Loss: 0.7356
Structure Loss: 0.9195
Loss: 0.7354
Structure Loss: 0.9192
Loss: 0.7350
Structure Loss: 0.9187
Loss: 0.7355
Structure Loss: 0.9193
Loss: 0.7353
Structure Loss: 0.9190
Loss: 0.7349
Structure Loss: 0.9186
Structure learning epoch: 136
Loss: 0.7356
Structure Loss: 0.9195
Loss: 0.7355
Structure Loss: 0.9192
Loss: 0.7350
Structure Loss: 0.9188
Loss: 0.7354
Structure Loss: 0.9193
Loss: 0.7353
Structure Loss: 0.9190
Loss: 0.7349
Structure Loss: 0.9186
Structure learning epoch: 137
Loss: 0.7355
Structure Loss: 0.9194
Loss: 0.7354
Structure Loss: 0.9192
Loss: 0.7349
Structure Loss: 0.9187
Loss: 0.7353
Structure Loss: 0.9192
Loss: 0.7352
Structure Loss: 0.9189
Loss: 0.7348
Structure Loss: 0.9185
Structure learning epoch: 138
Loss: 0.7355
Structure Loss: 0.9194
Loss: 0.7354
Structure Loss: 0.9192
Loss: 0.7349
Str

Loss: 0.7344
Structure Loss: 0.9180
Structure learning epoch: 168
Loss: 0.7351
Structure Loss: 0.9188
Loss: 0.7350
Structure Loss: 0.9186
Loss: 0.7345
Structure Loss: 0.9181
Loss: 0.7349
Structure Loss: 0.9186
Loss: 0.7348
Structure Loss: 0.9184
Loss: 0.7344
Structure Loss: 0.9180
Structure learning epoch: 169
Loss: 0.7350
Structure Loss: 0.9188
Loss: 0.7350
Structure Loss: 0.9186
Loss: 0.7345
Structure Loss: 0.9181
Loss: 0.7349
Structure Loss: 0.9186
Loss: 0.7348
Structure Loss: 0.9184
Loss: 0.7344
Structure Loss: 0.9180
Structure learning epoch: 170
Loss: 0.7350
Structure Loss: 0.9187
Loss: 0.7349
Structure Loss: 0.9185
Loss: 0.7345
Structure Loss: 0.9181
Loss: 0.7348
Structure Loss: 0.9186
Loss: 0.7348
Structure Loss: 0.9184
Loss: 0.7344
Structure Loss: 0.9180
Structure learning epoch: 171
Loss: 0.7349
Structure Loss: 0.9186
Loss: 0.7348
Structure Loss: 0.9184
Loss: 0.7344
Structure Loss: 0.9180
Loss: 0.7348
Structure Loss: 0.9185
Loss: 0.7347
Structure Loss: 0.9183
Loss: 0.7343
Str

[0.33, 0.17, 0.5]
Acc layer 0 is: 0.0000, Acc layer 1 is: 0.0016, Acc layer 2 is: 0.0013, Final acc is: 0.0002
[0, 0.67, 0.33]
Acc layer 0 is: 0.0000, Acc layer 1 is: 0.0016, Acc layer 2 is: 0.0013, Final acc is: 0.0017
[0.67, 0, 0.33]
Acc layer 0 is: 0.0000, Acc layer 1 is: 0.0016, Acc layer 2 is: 0.0013, Final acc is: 0.0000
[0.33, 0.67, 0]
Acc layer 0 is: 0.0000, Acc layer 1 is: 0.0016, Acc layer 2 is: 0.0013, Final acc is: 0.0002
[0, 1, 0]
Acc layer 0 is: 0.0000, Acc layer 1 is: 0.0016, Acc layer 2 is: 0.0013, Final acc is: 0.0016
[0, 0, 1]
Acc layer 0 is: 0.0000, Acc layer 1 is: 0.0016, Acc layer 2 is: 0.0013, Final acc is: 0.0013
[1, 0, 0]
Acc layer 0 is: 0.0000, Acc layer 1 is: 0.0016, Acc layer 2 is: 0.0013, Final acc is: 0.0000


In [12]:
model.med_source_candidates

tensor([ 396, 2266, 2352, 4500, 4843, 5577, 6631, 7040, 7260])

In [13]:
model.med_target_candidates

tensor([ 396, 2266, 2352, 4500, 4843, 5577, 6631, 7040, 7260])

In [14]:
acc, MAP, Hit, AUC, top5, top10, top20, top30, top50, top100 = get_statistics(S, groundtruth, get_all_metric=True)
print("MAP: {:.4f}".format(MAP))
print("AUC: {:.4f}".format(AUC))
print("Top_1: {:.4f}".format(acc))
print("Top_5: {:.4f}".format(top5))
print("Top_10: {:.4f}".format(top10))
print("Top_20: {:.4f}".format(top20))
print("Top_30: {:.4f}".format(top30))
print("Top_50: {:.4f}".format(top50))
print("Top_100: {:.4f}".format(top100))
print("Full_time: {:.4f}".format(time() - start_time))


MAP: 0.0050
AUC: 0.6334
Top_1: 0.0002
Top_5: 0.0045
Top_10: 0.0098
Top_20: 0.0187
Top_30: 0.0276
Top_50: 0.0424
Top_100: 0.0698
Full_time: 213.6361


In [18]:
groundtruth

{1530: 0,
 634: 1,
 2130: 2,
 8479: 3,
 1265: 4,
 2679: 5,
 3056: 6,
 48: 7,
 7705: 8,
 719: 9,
 7527: 10,
 7500: 11,
 5296: 12,
 1204: 13,
 3016: 14,
 7770: 15,
 4077: 16,
 3181: 17,
 4937: 18,
 8034: 19,
 5334: 20,
 5544: 21,
 2431: 22,
 7923: 23,
 3794: 24,
 1333: 25,
 927: 26,
 424: 27,
 2398: 28,
 1861: 29,
 3526: 30,
 2357: 31,
 7145: 32,
 2074: 33,
 1604: 34,
 1248: 35,
 7388: 36,
 6050: 37,
 5095: 38,
 3099: 39,
 3442: 40,
 5180: 41,
 758: 42,
 1952: 43,
 4353: 44,
 4214: 45,
 2041: 46,
 49: 47,
 5172: 48,
 4923: 49,
 8321: 50,
 7776: 51,
 4524: 52,
 2999: 53,
 2313: 54,
 1279: 55,
 3942: 56,
 5390: 57,
 8516: 58,
 4965: 59,
 5535: 60,
 8461: 61,
 6272: 62,
 255: 63,
 7268: 64,
 6082: 65,
 6982: 66,
 7365: 67,
 8491: 68,
 7076: 69,
 6122: 70,
 1063: 71,
 5538: 72,
 8168: 73,
 8266: 74,
 6792: 75,
 1046: 76,
 8468: 77,
 3353: 78,
 4308: 79,
 810: 80,
 6614: 81,
 5305: 82,
 1884: 83,
 6304: 84,
 8400: 85,
 1251: 86,
 5490: 87,
 7980: 88,
 204: 89,
 6218: 90,
 4661: 91,
 8255: 92,