## Graph Embedding

embedding: graph's nodes and edges are represented as vectors 
Source: https://www.youtube.com/watch?v=h08RW3AIBVg

In [20]:
import numpy as np
import networkx as nx
import random
import matplotlib.pyplot as plt
from typing import List
from tqdm import tqdm
from gensim.models.word2vec import Word2Vec
from IPython.display import display
from PIL import Image
from collections import defaultdict

In [13]:
def compute_probabilities(graph, probs, p, q):
    """
    graph:input graph
    probs: empty probability dict
    p: return parameter
    q: in-out parameter
    """
    
    for source_node in graph.nodes():
        for current_node in graph.neighbors(source_node):
            probs_ = list()
            for destination in graph.neighbors(current_node):
                
                if source_node == destination:
                    prob_ = graph[current_node][destination].get('weight', 1) * (1/p)
                elif destination in graph.neighbors(source_node):
                    prob_ = graph[current_node][destination].get('weight', 1)
                else:
                    prob_ = graph[current_node][destination].get('weight', 1) * (1/q)
                    
                probs_.append(prob_)
            probs[source_node]['probabilities'][current_node] = probs_ / np.sum(probs_)
            
    return probs

In [16]:
def generate_random_walks(graph, probs, max_walks, walk_len):
    
    
    walks = list()
    for start_node in graph.nodes():
        for i in range(max_walks):
            
            walk = [start_node]
            walk_options = list(graph[start_node])
            if len(walk_options)==0:
                break
            first_step = np.random.choice(walk_options)
            walk.append(first_step)
            
            for k in range(walk_len - 2):
                walk_options = list(graph[walk[---1]])
                if len(walk_options)==0:
                    break
                probabilities = probs[walk[-2]]['probabilities'][walk[-1]]
                next_step = np.random.choice(walk_options, p=probabilities)
                walk.append(next_step)
            walks.append(walk)
    np.random.shuffle(walks)
    walks = [list(map(str,walk)) for walk in walks]
    
    return walks

In [26]:
def Node2Vec(generated_walks, window_size, embedding_vector_size):
    model = Word2Vec(sentences=generated_walks, window = window_size, vector_size=embedding_vector_size)
    return model.wv

In [18]:
G = nx.karate_club_graph()

In [21]:
probs = defaultdict(dict)
for node in G.nodes():
    probs[node]['probabilities'] = dict()

In [22]:
probs

defaultdict(dict,
            {0: {'probabilities': {}},
             1: {'probabilities': {}},
             2: {'probabilities': {}},
             3: {'probabilities': {}},
             4: {'probabilities': {}},
             5: {'probabilities': {}},
             6: {'probabilities': {}},
             7: {'probabilities': {}},
             8: {'probabilities': {}},
             9: {'probabilities': {}},
             10: {'probabilities': {}},
             11: {'probabilities': {}},
             12: {'probabilities': {}},
             13: {'probabilities': {}},
             14: {'probabilities': {}},
             15: {'probabilities': {}},
             16: {'probabilities': {}},
             17: {'probabilities': {}},
             18: {'probabilities': {}},
             19: {'probabilities': {}},
             20: {'probabilities': {}},
             21: {'probabilities': {}},
             22: {'probabilities': {}},
             23: {'probabilities': {}},
             24: {'probabilities

In [24]:
cp = compute_probabilities(G, probs, 1, 1)
walks = generate_random_walks(G, cp,5,10)

In [27]:
# generate embeddings
n2v_emb = Node2Vec(walks,20,20)