# **CLUE API for Networks.Skewed.De**

In the website https://networks.skewed.de, there are plenty of dynamical models that come from graphs. There are two main options when considering a dynamical system over a grpah:

* Considering the adjacency system: $x' = A x$.
* Considering the laplacian system: $x' = L x$, where $L$ is the Laplacian matrix $D - A$.

This notebook is oriented to provide a simple interface from CLUE to retrieve models from this website without any input from the user.
Combined with the ERODE output for a CLUE system, this should be equivalent to the use of ERODE to produce `.ode` files.

In [54]:
import requests, zipfile, io, pandas, sys, time
from contextlib import redirect_stdout
from numpy import ndarray, array
from sympy import QQ

sys.path.insert(0, "..") # clue is here
from clue import FODESystem, SparsePolynomial, SparseVector, SparseRowMatrix

class Graph:
    def __init__(self, vertices = None, edges = None, final = False, undirected = False, multi = True):
        vertices = [] if vertices is None else vertices
        edges = [] if edges is None else edges
        
        if not isinstance(vertices, (list, tuple)):
            raise TypeError("Only list or tuples are valid to provide info for vertices")
        
        if not isinstance(edges, (list,tuple)):
            raise TypeError("Edges must be given as a list of triplets")
            
        self.__vertices = []
        self.__edges = []
        self.__final = False
        for vertex in vertices:
            self.add_vertex(vertex)
        for edge in edges:
            if not isinstance(edge, (list,tuple)) and len(edge) < 2:
                raise TypeError("An edge must have 2 or 3 components at least")
            src, trg = edge[:2]
            if src not in self.__vertices and not isinstance(src, int):
                raise TypeError("A vertex in an edge must be a valid structure or an integer")
            if trg not in self.__vertices and not isinstance(trg, int):
                raise TypeError("A vertex in an edge must be a valid structure or an integer")
            
            src = self.__vertices.index(src) if not isinstance(src, int) else src
            trg = self.__vertices.index(trg) if not isinstance(trg, int) else trg
            
            value = edge[2] if len(edge) >= 3 else 1
            self.add_edge(src, trg, value, add=multi, undirected=undirected)
        self.__final = final
        
            
    def add_vertex(self, vertex):
        if self.__final: raise ValueError("The graph is define as final. Can not be changed")
            
        if vertex in self.__vertices:
            raise ValueError(f"Repeated vertex wanted to be added to the graph ({vertex})")
        self.__vertices.append(vertex)
        self.__edges.append(dict())
        
    def add_edge(self, src, trg, value, add = True, undirected = False):
        if self.__final: raise ValueError("The graph is define as final. Can not be changed")
            
        if src < 0 or src > len(self.__vertices):
            raise IndexError("A vertex is given out or range")
        if trg < 0 or trg > len(self.__vertices):
            raise IndexError("A vertex is given out or range")
            
        if trg in self.__edges[src]:
            if not add: raise KeyError("Requested not to add value to an already existing edge")
            self.__edges[src][trg] += value
        else:
            self.__edges[src][trg] = value
            
        if undirected: self.add_edge(trg, src, value, add)
        
    def adjacency_matrix(self,field=QQ) -> SparseRowMatrix:
        n = len(self)
        matrix = SparseRowMatrix(n,field)
        for i in range(n):
            for k,v in self.__edges[i].items():
                matrix.increment(i,k, field.convert(v))
        return matrix
    
    def degree(self, vertex) -> int:
        vertex = self.__vertices.index(vertex) if not isinstance(vertex, int) else vertex
        return sum(self.__edges[vertex].values())
    
    def laplacian_matrix(self, field=QQ) -> SparseRowMatrix:
        n = len(self)
        matrix = self.adjacency_matrix(field)
        for i in range(n):
            deg = field.convert(-self.degree(i))
            matrix.increment(i,i, deg)
        return matrix
            
    def __len__(self) -> int:
        return len(self.__vertices)

In [56]:
def FromNetwork(network, name=None, undirected=None, adjacency=True):
    print(f"[FromNetwork] Reaching the website for the model {network}_{name}...", end=" ", flush=True)
    r = requests.get(f"https://networks.skewed.de/api/net/{network}").json()
    print("Done", flush=True)
    name = r["nets"][0] if name is None else name
    if not name in r["nets"]:
        raise ValueError(f"{name} is not valid for the class {network}")
        
    ## Getting other inputs (undirected and multigraph)
    undirected = undirected if undirected != None else (not (r["analyses"]["is_directed"] if len(r['nets']) == 1 else r["analyses"][name]["is_directed"]))
        
    print(f"[FromNetwork] Reaching the website for the csv description...", end=" ", flush=True)          
    graph_zip = zipfile.ZipFile(io.BytesIO(requests.get(f"https://networks.skewed.de/net/{network}/files/{name}.csv.zip").content))
    print("Done", flush=True)

    print(f"[FromNetwork] Building graph...", end=" ", flush=True)     
    vertices = pandas.read_csv(io.BytesIO(graph_zip.read("nodes.csv")), delimiter=",")
    print("Read vertices...", end=" ", flush=True)
    edges = pandas.read_csv(io.BytesIO(graph_zip.read("edges.csv")), delimiter=",")
    print("Read edges...", end=" ", flush=True)
    varnames = vertices[vertices.columns[1]].tolist()
    print("Created variable names...", end=" ", flush=True)
    G = Graph(
        vertices = varnames, 
        edges = list(edges[edges.columns[:3]].itertuples(index=False, name=None)),
        final = True,
        undirected = undirected
    )
    print("Done", flush=True)
    print(f"[FromNetwork] Building {'adjacency' if adjacency else 'laplacian'} matrix...", end=" ", flush=True)     
    A = G.adjacency_matrix() if adjacency else G.laplacian_matrix()
    print("Done")
    print(f"[FromNetwork] Building differential system...", end=" ", flush=True)     
    system = FODESystem.LinearSystem(A, variables=varnames)
    print("Done", flush=True)
    return system

In [57]:
%time system = FromNetwork("eu_procurements_alt", name="FR_2013", undirected=True)

[FromNetwork] Reaching the website for the model eu_procurements_alt_FR_2013... Done
[FromNetwork] Reaching the website for the csv description... Done
[FromNetwork] Building graph... Read vertices... Read edges... Created variable names... Done
[FromNetwork] Building adjacency matrix... Done
[FromNetwork] Building differential system... Done
CPU times: user 2min 1s, sys: 5.18 ms, total: 2min 1s
Wall time: 2min 2s


## Performance of the API

In [37]:
list_of_models_bytes = requests.get("https://networks.skewed.de/api/nets").content
all_models = [
    el.removeprefix('"').removesuffix('"') 
    for el in list_of_models_bytes.decode("utf-8").removeprefix("[").removesuffix("]\n").split(",")
]
print(f"Number of total models: {len(all_models)}")
def find_valid_100K(model):
    json = requests.get(f"https://networks.skewed.de/api/net/{model}").json()
    if json.get("restricted", False):
        return []
    
    networks = json.get("nets", [])
    if len(networks) == 1:
        nvertices = json["analyses"]["num_vertices"]
        if nvertices < 100_000:
            return [(model, json["nets"][0])]
        else:
            return []
    elif len(networks) > 1:
        return [(model,network) for network in networks if json["analyses"][network]["num_vertices"] < 100_000]
    else:
        raise RuntimeError
            
upTo100K = []
for model in all_models:
    old_length = len(upTo100K)
    upTo100K.extend(find_valid_100K(model))
print(f"Found {len(upTo100K)} models with given characteristics")

def time_test(model, name):
    start = time.time()
    system = FromNetwork(model, name)
    end = time.time()
    return system.size, end-start

Number of total models: 279
Found 163257 models with given characteristics


In [None]:
times = {}
with open("./log.log", "w") as log:
    with redirect_stdout(log):
        for (model, name) in upTo100K:
            n,t = time_test(model,name)
            if not n in times:
                times[n] = []
            times[n].append(t)
print("Computed all times for building the system")

In [45]:
r = requests.get(f"https://networks.skewed.de/api/net/advogato").json()

In [52]:
%debug

> [0;32m/home/anton/git/clue/clue/linalg.py[0m(569)[0;36mincrement[0;34m()[0m
[0;32m    567 [0;31m            [0mwhole[0m [0mzero[0m [0mrow[0m[0;34m.[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    568 [0;31m        '''
[0m[0;32m--> 569 [0;31m        [0mself[0m[0;34m[[0m[0mi[0m[0;34m,[0m [0mj[0m[0;34m][0m [0;34m=[0m [0mself[0m[0;34m[[0m[0mi[0m[0;34m,[0m [0mj[0m[0;34m][0m [0;34m+[0m [0mextra[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    570 [0;31m[0;34m[0m[0m
[0m[0;32m    571 [0;31m    [0;31m#--------------------------------------------------------------------------[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  print(extra)


0.8


ipdb>  self.field


QQ


ipdb>  self.field.convert(extra)


MPQ(4,5)


ipdb>  exit


In [46]:
r["analyses"]

{'num_edges': 51127,
 'num_vertices': 6541,
 'is_directed': True,
 'average_degree': 7.8163889313560615,
 'degree_std_dev': 34.12941084115283,
 'is_bipartite': False,
 'global_clustering': 0.11104023204960285,
 'degree_assortativity': -0.052796485761516326,
 'largest_component_fraction': 0.7708301482953677,
 'edge_reciprocity': 0.3851585268057973,
 'transition_gap': 0.9528608197737135,
 'mixing_time': 20.70975207197159,
 'hashimoto_radius': 68.60542306178223,
 'diameter': 9,
 'edge_properties': [['weight', 'double']],
 'vertex_properties': [['meta', 'string'], ['_pos', 'vector<double>']],
 'knn_proj_1': 4.117164545800367,
 'knn_proj_2': 2.5417987237312554}