In [None]:
from models import *
from graph_loaders import *
import networkx as nx

import numpy as np
import pandas as pd
import seaborn as sb
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

import matplotlib.pyplot as plt
import graphwave.graphwave as graphwave
from graphwave.graphwave.shapes import build_graph
from graphwave.graphwave.graphwave import *

np.random.seed(124)

In [None]:
current_palette = sns.color_palette("hls", 6)
sns.set_palette(current_palette)

## Picture of house graph

In [None]:
g, features = generate_house()
for n1, n2, attr in g.edges(data=True):
    attr['weight'] = 1.0
labels = set([g.node[x]['label'] for x in g.node])
colors = {label: idx for idx, label in enumerate(labels)}
pos = nx.kamada_kawai_layout(g)
nx.draw(g, pos, node_color=[current_palette[colors[g.node[x]['label']]] for x in g.node()])
plt.show()

## Simple version of model

In [None]:
node_list = [x for x in g.node]

model1 = MeanModel(
    emb_dim=2,
    n_nbr_samples1=4,
    n_nbr_samples2=2,
    g=g,
    features=features,
)
optimizer1 = optim.Adam(
    model1.parameters(),
    lr=0.01,
)
model2 = MeanModel(
    emb_dim=2,
    n_nbr_samples1=4,
    n_nbr_samples2=2,
    g=g,
    features=features,
)
optimizer2 = optim.Adam(
    model2.parameters(),
    lr=0.01,
)
for _ in range(50):
    model1 = model1.train()
    model2 = model2.train()
    optimizer1.zero_grad()
    optimizer2.zero_grad()
    emb_u = model1(node_list)
    emb_v = model2(node_list)
    emb_neg = torch.cat(
        (
            model2([random.choice(node_list) for _ in range(len(node_list))]),
            model2(node_list, randomize_features=True),
        ),
        dim=1,
    ).view(2 * len(node_list), -1)
    pos_weight = emb_neg.numel() / emb_u.numel()
    loss = sigmoid_loss(emb_u, emb_v, emb_neg, pos_weight)
    print(float(loss.data.numpy()))
    loss.backward()
    optimizer1.step()
    optimizer2.step()

In [None]:
model1 = model1.eval()
emb = torch.cat(
    (
        model1(node_list).unsqueeze(1),
        model1(node_list).unsqueeze(1),
        model1(node_list).unsqueeze(1),
        model1(node_list).unsqueeze(1),
        model1(node_list).unsqueeze(1),
    ),
    dim=1,
).mean(dim=1)

In [None]:
output = emb.data.numpy()
pca = PCA(n_components=2)
output = pd.DataFrame(pca.fit_transform(output))

output.columns = ['x', 'y']
output['color'] = [colors[g.node[idx]['label']] for idx in node_list]


sns.lmplot(
    x='x',
    y='y',
    data=output,
    fit_reg=False,
    hue='color',
    legend=False,
    height=5,
    aspect=1.5,
    scatter_kws={"s": 250},
)
plt.show()

In [None]:
shs = []

for _ in range(50):
    node_list = [x for x in g.node]

    model1 = MeanModel(
        emb_dim=2,
        n_nbr_samples1=4,
        n_nbr_samples2=2,
        g=g,
        features=features,
    )
    optimizer1 = optim.Adam(
        model1.parameters(),
        lr=0.01,
    )
    model2 = MeanModel(
        emb_dim=2,
        n_nbr_samples1=4,
        n_nbr_samples2=2,
        g=g,
        features=features,
    )
    optimizer2 = optim.Adam(
        model2.parameters(),
        lr=0.01,
    )
    for _ in range(100):
        model1 = model1.train()
        model2 = model2.train()
        optimizer1.zero_grad()
        optimizer2.zero_grad()
        emb_u = model1(node_list)
        emb_v = model2(node_list)
        emb_neg = torch.cat(
            (
                #model2([random.choice(node_list) for _ in range(len(node_list))]),
                #model2([random.choice(node_list) for _ in range(len(node_list))]),
                model2(node_list, randomize_features=True),
                model2(node_list, randomize_features=True),
            ),
            dim=1,
        ).view(2 * len(node_list), -1)
        pos_weight = emb_neg.numel() / emb_u.numel()
        loss = sigmoid_loss(emb_u, emb_v, emb_neg, pos_weight)
        loss.backward()
        optimizer1.step()
        optimizer2.step()

    model1 = model1.eval()
    emb = torch.cat(
        (
            model1(node_list).unsqueeze(1),
            model1(node_list).unsqueeze(1),
            model1(node_list).unsqueeze(1),
            model1(node_list).unsqueeze(1),
            model1(node_list).unsqueeze(1),
        ),
        dim=1,
    ).mean(dim=1)
    
    output = emb.data.numpy()
    pca = PCA(n_components=2)
    output = pd.DataFrame(pca.fit_transform(StandardScaler().fit_transform(output)))

    output.columns = ['x', 'y']
    output['color'] = [colors[g.node[idx]['label']] for idx in node_list]
    shs.append(silhouette_score(output[['x', 'y']], output['color']))
    
print(np.mean(shs), np.std(shs))

## Between

In [None]:
node_list = [x for x in g.node]

model1 = MeanModel(
    emb_dim=2,
    n_nbr_samples1=4,
    n_nbr_samples2=2,
    g=g,
    features=features,
)
optimizer1 = optim.Adam(
    model1.parameters(),
    lr=0.01,
)
model2 = MeanModel(
    emb_dim=2,
    n_nbr_samples1=4,
    n_nbr_samples2=2,
    g=g,
    features=features,
)
optimizer2 = optim.Adam(
    model2.parameters(),
    lr=0.01,
)
for _ in range(50):
    model1 = model1.train()
    model2 = model2.train()
    optimizer1.zero_grad()
    optimizer2.zero_grad()
    emb_u = model1(node_list)
    emb_v = model2([random.choice(list(g[x])) for x in node_list])
    emb_neg = torch.cat(
        (
            model2([random.choice(node_list) for _ in range(len(2 * node_list))]),
            # model1(node_list, randomize_features=True),
        ),
        dim=1,
    ).view(2 * len(node_list), -1)
    pos_weight = emb_neg.numel() / emb_u.numel()
    loss = sigmoid_loss(emb_u, emb_v, emb_neg, pos_weight)
    print(float(loss.data.numpy()))
    loss.backward()
    optimizer1.step()
    optimizer2.step()

In [None]:
node_list = [x for x in g.node]

model1 = model1.eval()
emb = torch.cat(
    (
        model1(node_list).unsqueeze(1),
        model1(node_list).unsqueeze(1),
        model1(node_list).unsqueeze(1),
        model1(node_list).unsqueeze(1),
        model1(node_list).unsqueeze(1),
    ),
    dim=1,
).mean(dim=1)

In [None]:
emb.size()

In [None]:
output = emb.data.numpy()
pca = PCA(n_components=2)
output = pd.DataFrame(pca.fit_transform(StandardScaler().fit_transform(output)))
output.columns = ['x', 'y']
output['color'] = [colors[g.node[idx]['label']] for idx in node_list]

sns.lmplot(
    x='x',
    y='y',
    data=output,
    fit_reg=False,
    hue='color',
    legend=False,
    height=5,
    aspect=1.5,
    scatter_kws={"s": 200},
)
plt.show()

In [None]:
shs = []

for _ in range(50):
    node_list = [x for x in g.node]

    model1 = MeanModel(
        emb_dim=2,
        n_nbr_samples1=4,
        n_nbr_samples2=2,
        g=g,
        features=features,
    )
    optimizer1 = optim.Adam(
        model1.parameters(),
        lr=0.01,
    )
    model2 = MeanModel(
        emb_dim=2,
        n_nbr_samples1=4,
        n_nbr_samples2=2,
        g=g,
        features=features,
    )
    optimizer2 = optim.Adam(
        model2.parameters(),
        lr=0.01,
    )
    for _ in range(100):
        model1 = model1.train()
        model2 = model2.train()
        optimizer1.zero_grad()
        optimizer2.zero_grad()
        emb_u = model1(node_list)
        emb_v = model2([random.choice(list(g[x])) for x in node_list])
        emb_neg = torch.cat(
            (
                model2([random.choice(node_list) for _ in range(len(2 * node_list))]),
                # model1(node_list, randomize_features=True),
            ),
            dim=1,
        ).view(2 * len(node_list), -1)
        pos_weight = emb_neg.numel() / emb_u.numel()
        loss = sigmoid_loss(emb_u, emb_v, emb_neg, pos_weight)
        loss.backward()
        optimizer1.step()
        optimizer2.step()
    model1 = model1.eval()
    emb = torch.cat(
        (
            model1(node_list).unsqueeze(1),
            model1(node_list).unsqueeze(1),
            model1(node_list).unsqueeze(1),
            model1(node_list).unsqueeze(1),
            model1(node_list).unsqueeze(1),
        ),
        dim=1,
    ).mean(dim=1)
    
    output = emb.data.numpy()
    pca = PCA(n_components=2)
    output = pd.DataFrame(pca.fit_transform(StandardScaler().fit_transform(output)))

    output.columns = ['x', 'y']
    output['color'] = [colors[g.node[idx]['label']] for idx in node_list]
    shs.append(silhouette_score(output[['x', 'y']], output['color']))
    
print(np.mean(shs), np.std(shs))

## Graphwave

In [None]:
chi, heat_print, taus = graphwave_alg(g, np.linspace(0,100,25), taus='auto', verbose=True)

In [None]:
pca = PCA(n_components=2)
graphwave_emb = pca.fit_transform(StandardScaler().fit_transform(chi))

In [None]:
node_list = [x for x in g.node]
output = pd.DataFrame(graphwave_emb)
output.columns = ['x', 'y']
output['color'] = [colors[g.node[idx]['label']] for idx in node_list]

sns.lmplot(
    x='x',
    y='y',
    data=output,
    fit_reg=False,
    hue='color',
    legend=False,
    height=5,
    aspect=1.5,
    scatter_kws={"s": 200},
)
plt.show()

## Node2vec / DeepWalk

In [None]:
from node2vec.src import node2vec
from gensim.models import Word2Vec

In [None]:
for n1, n2, data in g.edges(data=True):
    data['weight'] = 1.0

G = node2vec.Graph(g, is_directed=False, p=1.0, q=1.0)
G.preprocess_transition_probs()
walks = G.simulate_walks(10, 80)
walks = [[str(w) for w in walk] for walk in walks]
model = Word2Vec(walks, size=8, window=10, min_count=0, sg=1, workers=1, iter=10)
wvs = model.wv

In [None]:
output = wvs[[str(node) for node in node_list]]
pca = PCA(n_components=2)
output = pd.DataFrame(pca.fit_transform(StandardScaler().fit_transform(output)))
output.columns = ['x', 'y']
output['color'] = [colors[g.node[idx]['label']] for idx in node_list]

sns.lmplot(
    x='x',
    y='y',
    data=output,
    fit_reg=False,
    hue='color',
    legend=False,
    height=5,
    aspect=1.5,
    scatter_kws={"s": 200},
)
plt.show()

## Picture of barbell graph

In [None]:
current_palette = sns.color_palette("hls", 7)
sns.set_palette(current_palette)
g, features = generate_barbell()
for n1, n2, attr in g.edges(data=True):
    attr['weight'] = 1.0
labels = set([g.node[x]['label'] for x in g.node])
colors = {label: idx for idx, label in enumerate(labels)}
pos = nx.kamada_kawai_layout(g)
nx.draw(g, pos, node_color=[current_palette[colors[g.node[x]['label']]] for x in g.node()])
plt.show()

## Simple version of model

In [None]:
node_list = [x for x in g.node]

model1 = MeanModel(
    emb_dim=2,
    n_nbr_samples1=4,
    n_nbr_samples2=2,
    g=g,
    features=features,
)
optimizer1 = optim.Adam(
    model1.parameters(),
    lr=0.01,
)
model2 = MeanModel(
    emb_dim=2,
    n_nbr_samples1=4,
    n_nbr_samples2=2,
    g=g,
    features=features,
)
optimizer2 = optim.Adam(
    model2.parameters(),
    lr=0.01,
)
for _ in range(50):
    model1 = model1.train()
    model2 = model2.train()
    optimizer1.zero_grad()
    optimizer2.zero_grad()
    emb_u = model1(node_list)
    emb_v = model2(node_list)
    emb_neg = torch.cat(
        (
            #model2([random.choice(node_list) for _ in range(len(node_list))]),
            model2(node_list, randomize_features=True),
            model2(node_list, randomize_features=True),
        ),
        dim=1,
    ).view(2 * len(node_list), -1)
    pos_weight = emb_neg.numel() / emb_u.numel()
    loss = sigmoid_loss(emb_u, emb_v, emb_neg, pos_weight)
    print(float(loss.data.numpy()))
    loss.backward()
    optimizer1.step()
    optimizer2.step()

In [None]:
model1 = model1.eval()
emb = torch.cat(
    (
        model1(node_list).unsqueeze(1),
        model1(node_list).unsqueeze(1),
        model1(node_list).unsqueeze(1),
        model1(node_list).unsqueeze(1),
        model1(node_list).unsqueeze(1),
    ),
    dim=1,
).mean(dim=1)

In [None]:
output = emb.data.numpy()
pca = PCA(n_components=2)
output = pd.DataFrame(pca.fit_transform(StandardScaler().fit_transform(output)))

output.columns = ['x', 'y']
output['color'] = [colors[g.node[idx]['label']] for idx in node_list]

sns.lmplot(
    x='x',
    y='y',
    data=output,
    fit_reg=False,
    hue='color',
    legend=False,
    height=5,
    aspect=1.5,
    scatter_kws={"s": 200},
)
plt.show()

In [None]:
shs = []

for _ in range(50):
    node_list = [x for x in g.node]

    model1 = MeanModel(
        emb_dim=2,
        n_nbr_samples1=4,
        n_nbr_samples2=2,
        g=g,
        features=features,
    )
    optimizer1 = optim.Adam(
        model1.parameters(),
        lr=0.01,
    )
    model2 = MeanModel(
        emb_dim=2,
        n_nbr_samples1=4,
        n_nbr_samples2=2,
        g=g,
        features=features,
    )
    optimizer2 = optim.Adam(
        model2.parameters(),
        lr=0.01,
    )
    for _ in range(100):
        model1 = model1.train()
        model2 = model2.train()
        optimizer1.zero_grad()
        optimizer2.zero_grad()
        emb_u = model1(node_list)
        emb_v = model2(node_list)
        emb_neg = torch.cat(
            (
                model2(node_list, randomize_features=True),
                model2(node_list, randomize_features=True),
            ),
            dim=1,
        ).view(2 * len(node_list), -1)
        pos_weight = emb_neg.numel() / emb_u.numel()
        loss = sigmoid_loss(emb_u, emb_v, emb_neg, pos_weight)
        loss.backward()
        optimizer1.step()
        optimizer2.step()

    model1 = model1.eval()
    emb = torch.cat(
        (
            model1(node_list).unsqueeze(1),
            model1(node_list).unsqueeze(1),
            model1(node_list).unsqueeze(1),
            model1(node_list).unsqueeze(1),
            model1(node_list).unsqueeze(1),
        ),
        dim=1,
    ).mean(dim=1)
    
    output = emb.data.numpy()
    pca = PCA(n_components=2)
    output = pd.DataFrame(pca.fit_transform(StandardScaler().fit_transform(output)))

    output.columns = ['x', 'y']
    output['color'] = [colors[g.node[idx]['label']] for idx in node_list]
    output[['x', 'y']] = output[['x', 'y']] + 0.01 * np.random.normal(0, 1, size=output[['x', 'y']].shape)
    shs.append(silhouette_score(output[['x', 'y']], output['color']))
    
print(np.mean(shs), np.std(shs))

## Between

In [None]:
node_list = [x for x in g.node]

model1 = MeanModel(
    emb_dim=2,
    n_nbr_samples1=4,
    n_nbr_samples2=2,
    g=g,
    features=features,
)
optimizer1 = optim.Adam(
    model1.parameters(),
    lr=0.01,
)
model2 = MeanModel(
    emb_dim=2,
    n_nbr_samples1=4,
    n_nbr_samples2=2,
    g=g,
    features=features,
)
optimizer2 = optim.Adam(
    model2.parameters(),
    lr=0.01,
)
for _ in range(50):
    model1 = model1.train()
    model2 = model2.train()
    optimizer1.zero_grad()
    optimizer2.zero_grad()
    emb_u = model1(node_list)
    emb_v = model2([random.choice(list(g[x])) for x in node_list])
    emb_neg = torch.cat(
        (
            model2([random.choice(node_list) for _ in range(len(2 * node_list))]),
            # model1(node_list, randomize_features=True),
        ),
        dim=1,
    ).view(2 * len(node_list), -1)
    pos_weight = emb_neg.numel() / emb_u.numel()
    loss = sigmoid_loss(emb_u, emb_v, emb_neg, pos_weight)
    print(float(loss.data.numpy()))
    loss.backward()
    optimizer1.step()
    optimizer2.step()

In [None]:
model1 = model1.eval()
emb = torch.cat(
    (
        model1(node_list).unsqueeze(1),
        model1(node_list).unsqueeze(1),
        model1(node_list).unsqueeze(1),
        model1(node_list).unsqueeze(1),
        model1(node_list).unsqueeze(1),
    ),
    dim=1,
).mean(dim=1)

In [None]:
emb.size()

In [None]:
output = emb.data.numpy()
pca = PCA(n_components=2)
output = pd.DataFrame(pca.fit_transform(StandardScaler().fit_transform(output)))
output.columns = ['x', 'y']
output['color'] = [colors[g.node[idx]['label']] for idx in node_list]

print(silhouette_score(output[['x', 'y']], output['color']))


sns.lmplot(
    x='x',
    y='y',
    data=output,
    fit_reg=False,
    hue='color',
    legend=False,
    height=5,
    aspect=1.5,
    scatter_kws={"s": 200},
)
plt.show()

In [None]:
shs = []

for _ in range(50):
    node_list = [x for x in g.node]

    model1 = MeanModel(
        emb_dim=2,
        n_nbr_samples1=4,
        n_nbr_samples2=2,
        g=g,
        features=features,
    )
    optimizer1 = optim.Adam(
        model1.parameters(),
        lr=0.01,
    )
    model2 = MeanModel(
        emb_dim=2,
        n_nbr_samples1=4,
        n_nbr_samples2=2,
        g=g,
        features=features,
    )
    optimizer2 = optim.Adam(
        model2.parameters(),
        lr=0.01,
    )
    for _ in range(100):
        model1 = model1.train()
        model2 = model2.train()
        optimizer1.zero_grad()
        optimizer2.zero_grad()
        emb_u = model1(node_list)
        emb_v = model2([random.choice(list(g[x])) for x in node_list])
        emb_neg = torch.cat(
            (
                model2([random.choice(node_list) for _ in range(len(2 * node_list))]),
                # model1(node_list, randomize_features=True),
            ),
            dim=1,
        ).view(2 * len(node_list), -1)
        pos_weight = emb_neg.numel() / emb_u.numel()
        loss = sigmoid_loss(emb_u, emb_v, emb_neg, pos_weight)
        loss.backward()
        optimizer1.step()
        optimizer2.step()
    model1 = model1.eval()
    emb = torch.cat(
        (
            model1(node_list).unsqueeze(1),
            model1(node_list).unsqueeze(1),
            model1(node_list).unsqueeze(1),
            model1(node_list).unsqueeze(1),
            model1(node_list).unsqueeze(1),
        ),
        dim=1,
    ).mean(dim=1)
    
    output = emb.data.numpy()
    pca = PCA(n_components=2)
    output = pd.DataFrame(pca.fit_transform(StandardScaler().fit_transform(output)))

    output.columns = ['x', 'y']
    output['color'] = [colors[g.node[idx]['label']] for idx in node_list]
    output[['x', 'y']] = output[['x', 'y']] + 0.01 * np.random.normal(0, 1, size=output[['x', 'y']].shape)
    shs.append(silhouette_score(output[['x', 'y']], output['color']))
    
print(np.mean(shs), np.std(shs))

## Graphwave

In [None]:
import networkx as nx 
import numpy as np
import pandas as pd
import seaborn as sb
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

import matplotlib.pyplot as plt
import graphwave.graphwave as graphwave
from graphwave.graphwave.shapes import build_graph
from graphwave.graphwave.graphwave import *


np.random.seed(123)

In [None]:
chi, heat_print, taus = graphwave_alg(g, np.linspace(0,100,25), taus='auto', verbose=True)

In [None]:
pca = PCA(n_components=2)
graphwave_emb = pca.fit_transform(StandardScaler().fit_transform(chi))

In [None]:
node_list = [x for x in g.node]
output = pd.DataFrame(graphwave_emb)
output.columns = ['x', 'y']
output['color'] = [colors[g.node[idx]['label']] for idx in node_list]

sns.lmplot(
    x='x',
    y='y',
    data=output,
    fit_reg=False,
    hue='color',
    legend=False,
    height=5,
    aspect=1.5,
    scatter_kws={"s": 200},
)
plt.show()

## Node2vec / DeepWalk

In [None]:
from node2vec.src import node2vec
from gensim.models import Word2Vec

In [None]:
for n1, n2, data in g.edges(data=True):
    data['weight'] = 1.0

G = node2vec.Graph(g, is_directed=False, p=1.0, q=1.0)
G.preprocess_transition_probs()
walks = G.simulate_walks(10, 80)
walks = [[str(w) for w in walk] for walk in walks]
model = Word2Vec(walks, size=8, window=10, min_count=0, sg=1, workers=1, iter=10)
wvs = model.wv


In [None]:
output = wvs[[str(node) for node in node_list]]
pca = PCA(n_components=2)
output = pd.DataFrame(pca.fit_transform(StandardScaler().fit_transform(output)))
output.columns = ['x', 'y']
output['color'] = [colors[g.node[x]['label']] for x in node_list]

sns.lmplot(
    x='x',
    y='y',
    data=output,
    fit_reg=False,
    hue='color',
    legend=False,
    height=5,
    aspect=1.5,
    scatter_kws={"s": 200},
)
plt.show()