# Structures of Networks - workgroup session 3

The structure of this notebook follows that of the associated problem sheet.
Code blocks (cells) are already available for (sub)problems that involve coding, but you can always add more.
Some of these code blocks contain coding-related tips, in the form of comments.

## 1.a.

In [4]:
import networkx as ntx
import random as rn
import numpy as np
import math as mt
import matplotlib.pyplot as plt

rn.seed(1)
np.random.seed(1)

# finds a random path in graph G, between distinct nodes i and j
# the path is empty if running into a dead end, because of lack of connectivity or single-neighbor nodes
def random_path(G,i,j):
    
    path = [i]
    k = i
    found = False
    deadend = False;
    
    while found == False:

        neighb_k = []
            
        for l in G.neighbors(k):
            neighb_k.append(l)
            found = found or (l == j)
            
        if(found == False):
            # the neighbors of k that have not been visited yet
            valid_neighb_k = list((set(neighb_k)).difference(set(path)))

            if(len(valid_neighb_k) > 0):
                k = rn.choice(valid_neighb_k)
                path.append(k)
            else:
                deadend = True
        else:
            path.append(j)

        if deadend == True:
            found = True
            path = []

    return path



## 1.b.

In [41]:
# if you have a list of density value called dens, you can plot an associated histogram using:
# plt.hist(dens, bins=10)
# plt.show()

## 1.c.

## 1.d.

# 1.e.


## 2.a

In [None]:

# randomly generates a list of feature vectors (LFV), containing n vectors of F entries each, where each entry is a binary random (unbiased) variable
def random_LFV(n, F):
    
    x = []    
    for i in range(0, n):
        xi = []
        for f in range(0, F):
            xi.append(rn.randint(0, 1))
        x.append(xi)
        
    return x

# computes the (Hamming) similarity between two (binary) vectors of equal lenghts
def similarity(x1, x2):
    F = len(x1)
    nmatch = 0.0
    for i in range(0, F):
        nmatch += int(x1[i] == x2[i])
    return float(nmatch)/F

# generates a random graph from a list of feature vectors x, using an h-power-adjusted Hamming similarity, which is interpreted as link probability
def LFV_graph(x, h):
    n = len(x)
    G = ntx.empty_graph(n)
    for i in range(0, n-1):
        for j in range(i+1, n):
            # probability of generating link (i,j):
            p = (similarity(x[i], x[j]))**h
            r = rn.random()
            if(r < p):
                ntx.add_path(G, [i,j])
    return G

# finds a path in graph G, between distinct nodes i and j, guided by list of feature vectors x, where each vector in x is associated to one node in G
# returns an empty path if running into a dead end, because of lack of connectivity or single-neighbor nodes
def guided_path(G,x,i,j):
    
    path = [i]
    k = i
    found = False
    deadend = False;
    
    while found == False:

        neighb_k = []
            
        for l in G.neighbors(k):
            neighb_k.append(l)
            found = found or (l == j)
            
        if(found == False):
            # the neighbors of k that have not been visited yet
            valid_neighb_k = list((set(neighb_k)).difference(set(path)))

            if(len(valid_neighb_k) > 0):
                # list of similarities between each valid neighbor of node k and target node j:
                sims = [similarity(x[vn], x[j]) for vn in valid_neighb_k]
                # id of valid neighbor with largest similarity to target node j:
                k = valid_neighb_k[sims.index(max(sims))]
                path.append(k)
            else:
                deadend = True
        else:
            path.append(j)

        if deadend == True:
            found = True
            path = []

    return path


## 2.b.

In [None]:
# example of a parameter choice and associated network generation:
# n = 1000
# F = 30
# h = 3.0
# x = random_LFV(n, F)
# LFVRG = LFV_graph(x, h)


## 2.c.

## 2.d.

## 3.a.

In [11]:
# computes the the mean of the h-adjusted (Hamming) similarities between all pairs of feature vectors in x 
def mean_adjusted_similarity(x, h):
    sum = 0.0
    n = len(x)
    for i in range(0, n-1):
        for j in range(i+1, n):
            sum += similarity(x[i], x[j])**h
    return sum*2.0/n/(n-1)

## 3.b.

In [13]:
# example of a parameter choice and associated generation of two networks, one for each model, with h-to-p matching in place:
# n = 1000
# F = 30
# h = 3.0
# x = random_LFV(n, F)
# LFVRG = LFV_graph(x, h)
# ERRG = ntx.erdos_renyi_graph(n, mean_adjusted_similarity(x, h))


## 3.c.

## 3.d.