In [29]:
import os
import weakref
from dataclasses import dataclass
from itertools import takewhile
from typing import Dict, List

import numpy as np
import pandas as pd
from numpy.typing import ArrayLike

class Node:
    def __init__(self, u_id: int, neighbors: ArrayLike, skip_unique: False = False):
        self._u_id = u_id
        # produces sorted array
        self.neighbors = np.unique(neighbors) if not skip_unique else neighbors
        
    @property
    def u_id(self):
        return self._u_id
    
    @property
    def deg(self):
        return len(self.neighbors)
    
    def subgraph(self, other_nodes: np.ndarray):
        return Node(self.u_id, get_intersection(self.neighbors, other_nodes), skip_unique=True)

class Network:
    def __init__(self, path: str):
        if not os.path.isfile(path):
            raise OSError("wrong graph path")
        
        with open(path) as f:
            skip_rows = len(list(
                takewhile(lambda s: s.startswith("%"), f)
            ))
                
        self.data = pd.read_csv(path, sep=r'\s+', header=None,
                                names=["fr", "to", "weight", "timestamp"], skiprows=skip_rows)\
                        .drop(columns=["weight"])\
                        .sort_values(by="timestamp")
        
        self.edges = np.sort(self.data[["fr", "to"]].values)
        self.total_nodes = self.edges.max()
        self.timestamps = self.data.timestamp.values

    def __str__(self):
        ans = "from\tto\tweight\ttimestamp\n"

        for key in sorted(self.__graph.keys()):
            node = self.__graph[key]

            for j in node.edges_to:
                ans += f"{node.u_id}\t{j.node.u_id}\n"

        return ans


class Graph:
    def __init__(self, network: Network, edges: np.ndarray, nodes: dict[int, Node]):
        self.network = network
        self.edges = edges
        self.edges_set: np.ndarray = np.unique(self.edges, axis=0)
        self.nodes = nodes
    
    def get_subgraph(self, nodes_ids: ArrayLike) -> "Graph":
        nodes_ids = np.unique(nodes_ids)
        new_nodes = {u_id : self.nodes[u_id].subgraph(nodes_ids)
                     for u_id in nodes_ids}
        new_edges = np.array([
            [u_id, neighbor] for u_id, node in new_nodes.items() for neighbor in node.neighbors
        ])
        
        return Graph(self.network, new_edges, new_nodes)
    
    @property
    def density(self):
        if len(self.nodes) > 1:
            return 2 * len(self.edges_set) / len(self.nodes) / (len(self.nodes) - 1) 
        return np.nan

class StaticGraph(Graph):
    def __init__(self, timestamps, *args, **kwargs):
        self.timestamps = timestamps
        super().__init__(*args, **kwargs)
        
    @staticmethod
    def from_time_slice(network, quantile_end, quantile_start=0) -> "Graph":
        assert 0 <= quantile_start <= quantile_end <= 1, "Incorrect quantiles"
        
        timestamps = network.timestamps
        left, right = np.quantile(timestamps, [quantile_start, quantile_end])
        edges = network.edges[(left <= timestamps) & (timestamps <= right)]
        
        undirected = np.vstack([edges, edges[:, ::-1]])
        adj_lists = pd.DataFrame(undirected, columns=["v1", "v2"])\
            .groupby("v1")\
            .v2.apply(np.array)
        
        nodes = {u_id : Node(u_id, neighbors) for u_id, neighbors in adj_lists.items()}
        
        return StaticGraph(timestamps, network, edges, nodes)

In [17]:
import numba
from numba import njit

@njit
def get_intersection(first, second):
    i = j = k = 0
    buffer = np.empty(min(first.size, second.size), dtype=first.dtype)
    while i < first.size and j < second.size:
        if first[i] == second[j]:
            buffer[k] = first[i]
            k += 1
            i += 1
            j += 1
        elif first[i] < second[j]:
            i += 1
        else: 
            j += 1
    return buffer[:k]

@njit
def get_union(first, second):
    i = j = k = 0
    buffer = np.empty(first.size + second.size, dtype=first.dtype)
    while i < first.size and j < second.size:
        if first[i] < second[j]:
            buffer[k] = first[i]
            k += 1
            i += 1
        elif first[i] > second[j]:
            buffer[k] = second[j]
            k += 1
            j += 1  
        else:
            if not k or buffer[k - 1] != first[i]:
                buffer[k] = first[i]
                k += 1
            i += 1
            j += 1
    while i < first.size:
        buffer[k] = first[i]
        k += 1
        i += 1
    while j < second.size:
        buffer[k] = second[j]
        k += 1
        j += 1
    return buffer[:k]

In [4]:
network = Network("data/opsahl-ucsocial/out.opsahl-ucsocial")
time_quantile = .8

In [30]:
graph = StaticGraph.from_time_slice(network, time_quantile)

In [6]:
num_vertices = len(graph.nodes)
num_edges = len(graph.edges_set)
density = 2 * num_edges / num_vertices / (num_vertices - 1)

In [31]:
from queue import deque

def get_connected_comps(graph: Graph) -> list[list[int]]:
    visited = {u_id : False for u_id in graph.nodes}
    queue = deque(graph.nodes)
    conn_comps: list[list[int]] = []
    
    for root_u_id in graph.nodes.keys():
        if visited[root_u_id]:
            continue
            
        queue = deque([root_u_id])
        conn_comps.append([])
        
        while queue:
            u_id = queue.popleft()
            conn_comps[-1].append(u_id)
            if not visited[u_id]:
                for neighbor_id in graph.nodes[u_id].neighbors:
                    if not visited[neighbor_id]:
                        queue.append(neighbor_id)
                        visited[neighbor_id] = True
    return conn_comps

In [32]:
conn_comps = get_connected_comps(graph)
conn_comp = graph.get_subgraph(max(conn_comps, key=len))
max_conn_comp_fraction =  len(conn_comp.nodes) / num_vertices


In [33]:
from scipy.stats import pearsonr 

def get_avg_cluster_coeff(graph: Graph) -> float:
    return np.nanmean([graph.get_subgraph(node.neighbors).density
                       for node in graph.nodes.values()])

def get_deg_assortivity(graph: Graph) -> float:
    degs = np.vectorize(lambda node_id: graph.nodes[node_id].deg)(graph.edges_set)
    return pearsonr(degs[:, 0], degs[:, 1]).statistic

In [34]:
from itertools import combinations

In [51]:
def get_static_features_raw(graph):
    size = graph.network.total_nodes
    features = np.zeros((size, size, 4))
    for node_1, node_2 in combinations(graph.nodes.values(), 2):
        intersection = get_intersection(node_1.neighbors, node_2.neighbors)
        union = get_union(node_1.neighbors, node_2.neighbors)
        features[node_1.u_id, node_2.u_id] = [
            len(intersection),
            (1 / np.log([graph.nodes[node].deg for node in intersection])).sum(),
            len(intersection) / len(union),
            node_1.deg * node_2.deg,
        ]
    return features

In [52]:
%%time
get_static_features(graph);

CPU times: user 10.1 s, sys: 59.5 ms, total: 10.1 s
Wall time: 9.98 s
