# Basic network import and representation

Based on nb02_data_import_and_networks.ipynb

but I look at the college-message SNAP repository

This contains 3-tuples with a time dimension, that I will flatten

We analyze the dataset 'CollegeMsg' available from the SNAP repository: http://snap.stanford.edu/data/index.html

Downloaded from here: https://snap.stanford.edu/data/CollegeMsg.html


In [None]:
#check if polygraph is correctly installed
!pip install polygraphs

In [1]:
#simulation
#path to configuration file
conf_path= '/Users/mariafedericanorelli/Desktop/polygraphs/configs/test.yaml'

#simulate
!polygraphs -f "{conf_path}"

[MON] step 0001 Ksteps/s   0.00 A/B 0.56/0.44
[MON] step 0049 Ksteps/s   0.65 A/B 0.00/1.00
 INFO polygraphs> Sim #0001:     49 steps    0.11s; action: B undefined: 0 converged: 1 polarized: 0 
[MON] step 0001 Ksteps/s   0.00 A/B 0.56/0.44
[MON] step 0073 Ksteps/s   0.85 A/B 0.00/1.00
 INFO polygraphs> Sim #0002:     73 steps    0.09s; action: B undefined: 0 converged: 1 polarized: 0 
[MON] step 0001 Ksteps/s   0.00 A/B 0.56/0.44
[MON] step 0100 Ksteps/s   0.85 A/B 0.00/1.00
[MON] step 0107 Ksteps/s   0.84 A/B 0.00/1.00
 INFO polygraphs> Sim #0003:    107 steps    0.13s; action: B undefined: 0 converged: 1 polarized: 0 
[MON] step 0001 Ksteps/s   0.00 A/B 0.44/0.56
[MON] step 0064 Ksteps/s   0.85 A/B 0.00/1.00
 INFO polygraphs> Sim #0004:     64 steps    0.08s; action: B undefined: 0 converged: 1 polarized: 0 
[MON] step 0001 Ksteps/s   0.00 A/B 0.62/0.38
[MON] step 0093 Ksteps/s   0.84 A/B 0.00/1.00
 INFO polygraphs> Sim #0005:     93 steps    0.11s; action: B undefined: 0 converged: 

In [None]:
import sys, math

In [None]:
%pylab inline

In [None]:
import collections as col

We use a dictionary that associates a key (node) to a list of nodes (neighbours)

In [None]:
links_out = col.defaultdict(list)
print(links_out)

We open the file containing the network and read each line

In [None]:
filepath = "CollegeMsg.txt"

In [None]:
fh = open(filepath, "r")

In [None]:
fh

In [None]:
s = fh.readlines()

In [None]:
s

In [None]:
s[:4]

In [None]:
s[10].strip().split()

In [None]:
for line in s:
    # remove "\n" characters (.strip()) and split the line at blank spaces (split.())
    t = line.strip().split()
    if t[0] != "#":
        # the first lines are comments
        origin = int(t[0])
        dest = int(t[1])
        links_out[origin].append(dest)

# close the file
fh.close()

In [None]:
len(links_out[1001])

How many nodes are in the network?

In [None]:
tot_nodes = len(links_out)
print(tot_nodes)

We calculate the out-degree distribution of the network.

In [None]:
degree_out = {}

for i in links_out:

    deg_out = len(links_out[i])

    if deg_out in degree_out:
        degree_out[deg_out] += 1
    else:
        degree_out[deg_out] = 1

In [None]:
print(sorted(degree_out.keys()))

In [None]:
degree_out

We export the degree distribution to an output file.

In [None]:
s_deg = sorted(degree_out.keys())

In [None]:
fout = open("CollegeMsg-degout-distri.txt", "w")
for d in s_deg:
    deg_freq = float(degree_out[d]) / tot_nodes

    fout.write(str(d) + "  " + str(deg_freq) + "\n")

fout.close()

In [None]:
for i in degree_out.items():
    print(i)

In [None]:
from operator import itemgetter

In [None]:
x = []
y = []

for i in sorted(degree_out.items(), key=itemgetter(0)):
    x.append(i[0])
    y.append(float(i[1]) / tot_nodes)

In [None]:
plt.figure(figsize=(10, 7))

plt.plot(x, y, "o-")

plt.xlabel("$k_{out}$", fontsize=24)
plt.ylabel("$P(k_{out})$", fontsize=24)
plt.xticks(fontsize=24)
plt.yticks(fontsize=24)
plt.yscale("log")
plt.xscale("log")

Let's have a look at the degree-in distribution.

In [None]:
links_in = col.defaultdict(list)

fh = open(filepath, "r")
# reading all the file lines
for line in fh.readlines():
    # remove "\n" characters (.strip()) and split the line at blank spaces (split.())
    s = line.strip().split()
    if s[0] != "#":
        # the first lines are comments
        origin = int(s[0])
        dest = int(s[1])
        links_in[dest].append(origin)

# chiudo il file
fh.close()

In [None]:
degree_in = col.defaultdict(int)
for i in links_in.keys():
    deg = len(links_in[i])
    degree_in[deg] += 1

tot_nodes_in = len(links_in)
print(tot_nodes_in)

What is the difference from an exponential distribution?

In [None]:
def f(t):
    return np.exp(-0.5 * t)


x = []
y = []
for i in sorted(degree_in.items(), key=itemgetter(0)):
    x.append(i[0])
    y.append(float(i[1]) / tot_nodes_in)

plt.figure(figsize=(10, 7))

plt.plot(np.array(x), np.array(y))
plt.plot(np.array(x), f(np.array(x)), label="Exponential")
plt.xlabel("$k_{in}$", fontsize=24)
plt.ylabel("$P(k_{in})$", fontsize=24)
plt.xticks(fontsize=24)
plt.yticks(fontsize=24)
plt.yscale("log")
plt.xscale("log")
plt.axis([1, 10000, 0.00001, 1])
plt.legend()
plt.show()