In [None]:
from models import *

## Node classif

In [None]:
g, action_features, structural_features, labels, idx_train, idx_test, idx_val = load_data('pubmed')
if torch.cuda.is_available():
    action_features = action_features.cuda()

## Untrained

In [None]:
untrained_accs, untrained_accs_sd = run_model_within(
    model_class=MeanModel,
    emb_dim=64,
    hidden_dim=192,
    n_nbr_samples1=25,
    n_nbr_samples2=10,
    n_pos_samples=5,
    n_neg_samples_rand=0,
    n_neg_samples_shuffle=20,
    g=g,
    features=action_features,
    labels=labels,
    train_idx_list=idx_train,
    test_idx_list=idx_test,
    lr=0.01,
    n_epochs=0,
    batch_size=256,
    dropout=0.6,
    n_runs=20,
)

In [None]:
print(np.mean(untrained_accs), np.std(untrained_accs))

## Only shuffling

In [None]:
shuffle_accs, shuffle_accs_sd = run_model_within(
    model_class=MeanModel,
    emb_dim=64,
    hidden_dim=192,
    n_nbr_samples1=25,
    n_nbr_samples2=10,
    n_pos_samples=5,
    n_neg_samples_rand=0,
    n_neg_samples_shuffle=20,
    g=g,
    features=action_features,
    labels=labels,
    train_idx_list=idx_train,
    test_idx_list=idx_test,
    lr=0.01,
    n_epochs=200,
    batch_size=256,
    dropout=0.6,
    n_runs=20,
)

In [None]:
print(np.mean(shuffle_accs), np.std(shuffle_accs))

## No shuffling

In [None]:
no_shuffle_accs, no_shuffle_accs_sd = run_model_within(
    model_class=MeanModel,
    emb_dim=64,
    hidden_dim=192,
    n_nbr_samples1=25,
    n_nbr_samples2=10,
    n_pos_samples=5,
    n_neg_samples_rand=20,
    n_neg_samples_shuffle=0,
    g=g,
    features=action_features,
    labels=labels,
    train_idx_list=idx_train,
    test_idx_list=idx_test,
    lr=0.01,
    n_epochs=200,
    batch_size=256,
    dropout=0.6,
    n_runs=20,
)

In [None]:
print(np.mean(no_shuffle_accs), np.std(no_shuffle_accs))

## Within

In [None]:
within_accs, within_accs_sd = run_model_within(
    model_class=MeanModel,
    emb_dim=64,
    hidden_dim=192,
    n_nbr_samples1=25,
    n_nbr_samples2=10,
    n_pos_samples=5,
    n_neg_samples_rand=10,
    n_neg_samples_shuffle=10,
    g=g,
    features=action_features,
    labels=labels,
    train_idx_list=idx_train,
    test_idx_list=idx_test,
    lr=0.01,
    n_epochs=200,
    batch_size=256,
    dropout=0.6,
    n_runs=20,
)

In [None]:
print(np.mean(within_accs), np.std(within_accs))

## Between

In [None]:
between_accs, between_accs_sd = run_model_between(
    model_class=MeanModel,
    emb_dim=64,
    hidden_dim=192,
    n_nbr_samples1=25,
    n_nbr_samples2=10,
    n_pos_samples=5,
    n_neg_samples_rand=20,
    n_neg_samples_shuffle=0,
    g=g,
    features=action_features,
    labels=labels,
    train_idx_list=idx_train,
    test_idx_list=idx_test,
    lr=0.01,
    n_epochs=200,
    batch_size=256,
    dropout=0.6,
    n_runs=20,
)

In [None]:
print(np.mean(between_accs), np.std(between_accs))

## Both

In [None]:
both_accs, both_accs_sd = run_model_both(
    model_class=MeanModel,
    emb_dim=64,
    hidden_dim=192,
    n_nbr_samples1=25,
    n_nbr_samples2=10,
    n_pos_samples=5,
    n_neg_samples_rand=10,
    n_neg_samples_shuffle=10,
    g=g,
    features=action_features,
    labels=labels,
    train_idx_list=idx_train,
    test_idx_list=idx_test,
    lr=0.01,
    n_epochs=200,
    batch_size=256,
    dropout=0.6,
    n_runs=20,
)

In [None]:
print(np.mean(both_accs), np.std(both_accs))

## Visualize

In [None]:
node_list = [x for x in g.node]
n_pos_samples = 5
n_neg_samples_rand = 10
n_neg_samples_shuffle = 10
lr = 0.01

model1 = MeanModel(
    emb_dim=64,
    hidden_dim=192,
    n_nbr_samples1=25,
    n_nbr_samples2=10,
    g=g,
    features=action_features,
    dropout=0.6,
)
optimizer1 = optim.Adam(
    model1.parameters(),
    lr=lr,
)
model2 = MeanModel(
    emb_dim=64,
    hidden_dim=192,
    n_nbr_samples1=25,
    n_nbr_samples2=10,
    g=g,
    features=action_features,
    dropout=0.6,
)
model2 = model1
optimizer2 = optim.Adam(
    model2.parameters(),
    lr=lr,
)
total_loss = 0
for epoch in range(200):
    model1 = model1.train()
    model2 = model2.train()
    random.shuffle(node_list)
    batch = node_list[:256]
    optimizer1.zero_grad()
    optimizer2.zero_grad()
    emb_u = model1(batch)
    nbrs = []
    for node in batch:
        for _ in range(n_pos_samples):
            nbrs.append(node)
    emb_v = model2(nbrs).view(n_pos_samples * len(batch), -1)
    neg_nodes_shuffle = []
    neg_nodes_rand = []
    for idx, node in enumerate(batch):
        for _ in range(n_neg_samples_shuffle):
            neg_nodes_shuffle.append(node)
        batch_minus_ego = list(set(batch) - {node}) #  - set(list(g[node])))
        for _ in range(n_neg_samples_rand):
            neg_nodes_rand.append(
                random.choice(
                    batch_minus_ego
                )
            )
    if len(neg_nodes_shuffle) > 0 and len(neg_nodes_rand) > 0:
        emb_neg1 = model2(neg_nodes_shuffle, randomize_features=True)
        emb_neg2 = model2(neg_nodes_rand, randomize_features=False)
        total_neg_samples = n_neg_samples_rand + n_neg_samples_shuffle
        emb_neg = torch.cat((emb_neg1, emb_neg2), dim=1).view(
            total_neg_samples * len(batch),
            -1,
        )
    elif len(neg_nodes_shuffle) > 0 and len(neg_nodes_rand) == 0:
        emb_neg = model2(neg_nodes_shuffle, randomize_features=True)
    elif len(neg_nodes_shuffle) == 0 and len(neg_nodes_rand) > 0:
        emb_neg = model2(neg_nodes_rand, randomize_features=False)
    pos_weight = emb_neg.numel() / emb_u.numel()
    loss = sigmoid_loss(emb_u, emb_v, emb_neg, pos_weight)
    total_loss += float(loss.cpu().data.numpy())
    loss.backward()
    optimizer1.step()
    optimizer2.step()
model1 = model1.eval()

In [None]:
node_list = [x for x in g.node]

emb_list = []

for node in node_list:
    emb_list.append(model1([node]).cpu().data.numpy().tolist()[0])

emb = np.array(emb_list)
    
from sklearn.manifold import TSNE

df = pd.DataFrame(TSNE(n_components=2).fit_transform(emb))

df['label'] = [labels[x] for x in node_list]

df.columns = ['x', 'y', 'label']

In [None]:
sns.set_style("whitegrid", {'axes.grid' : False})
sns.lmplot(
    x='x',
    y='y',
    data=df,
    fit_reg=False,
    hue='label',
    legend=False,
    height=10,
    aspect=1.5,
    scatter_kws={"s": 50},
)
plt.show()

In [None]:
silhouette_score(df[['x', 'y']], [labels[x] for x in node_list])