In [1]:
import os
import weakref
from dataclasses import dataclass
from itertools import takewhile
from typing import Dict, List

import numpy as np
import pandas as pd
from numpy.typing import ArrayLike

class Node:
    def __init__(self, u_id: int, neighbors: ArrayLike, skip_unique: False = False):
        self._u_id = u_id
        # produces sorted array
        self.neighbors = np.unique(neighbors) if not skip_unique else neighbors
        
    @property
    def u_id(self):
        return self._u_id
    
    @property
    def deg(self):
        return len(self.neighbors)
    
    def subgraph(self, other_nodes: np.ndarray):
        return Node(self.u_id, get_intersection(self.neighbors, other_nodes), skip_unique=True)
    
    # def two_steps_neighbors(self):
        

class Network:
    def __init__(self, path: str):
        if not os.path.isfile(path):
            raise OSError("wrong graph path")
        
        with open(path) as f:
            skip_rows = len(list(
                takewhile(lambda s: s.startswith("%"), f)
            ))
                
        self.data = pd.read_csv(path, sep=r'\s+', header=None,
                                names=["fr", "to", "weight", "timestamp"], skiprows=skip_rows)\
                        .drop(columns=["weight"])\
                        .sort_values(by="timestamp")
        
        self.edges = np.sort(self.data[["fr", "to"]].values)
        self.total_nodes = self.edges.max()
        self.timestamps = self.data.timestamp.values

    def __str__(self):
        ans = "from\tto\tweight\ttimestamp\n"

        for key in sorted(self.__graph.keys()):
            node = self.__graph[key]

            for j in node.edges_to:
                ans += f"{node.u_id}\t{j.node.u_id}\n"

        return ans


class Graph:
    def __init__(self, network: Network, edges: np.ndarray, nodes: dict[int, Node]):
        self.network = network
        self.edges = edges
        self.edges_set: np.ndarray = np.unique(self.edges, axis=0)
        self.nodes = nodes
    
    def get_subgraph(self, nodes_ids: ArrayLike) -> "Graph":
        nodes_ids = np.unique(nodes_ids)
        new_nodes = {u_id : self.nodes[u_id].subgraph(nodes_ids)
                     for u_id in nodes_ids}
        new_edges = np.array([
            [u_id, neighbor] for u_id, node in new_nodes.items() for neighbor in node.neighbors
        ])
        
        return Graph(self.network, new_edges, new_nodes)
    
    @property
    def density(self):
        if len(self.nodes) > 1:
            return 2 * len(self.edges_set) / len(self.nodes) / (len(self.nodes) - 1) 
        return np.nan

class StaticGraph(Graph):
    def __init__(self, timestamps, *args, **kwargs):
        self.timestamps = timestamps
        super().__init__(*args, **kwargs)
        
    @staticmethod
    def from_time_slice(network, quantile_end, quantile_start=0) -> "Graph":
        assert 0 <= quantile_start <= quantile_end <= 1, "Incorrect quantiles"
        
        timestamps = network.timestamps
        left, right = np.quantile(timestamps, [quantile_start, quantile_end])
        mask = (left <= timestamps) & (timestamps <= right)
        edges = network.edges[mask]
        timestamps = timestamps[mask]
        
        undirected = np.vstack([edges, edges[:, ::-1]])
        adj_lists = pd.DataFrame(undirected, columns=["v1", "v2"])\
            .groupby("v1")\
            .v2.apply(np.array)
        
        nodes = {u_id : Node(u_id, neighbors) for u_id, neighbors in adj_lists.items()}
        
        return StaticGraph(timestamps, network, edges, nodes)

In [2]:
import numba
from numba import njit

@njit
def get_intersection(first, second):
    i = j = k = 0
    buffer = np.empty(min(first.size, second.size), dtype=first.dtype)
    while i < first.size and j < second.size:
        if first[i] == second[j]:
            buffer[k] = first[i]
            k += 1
            i += 1
            j += 1
        elif first[i] < second[j]:
            i += 1
        else: 
            j += 1
    return buffer[:k]

In [3]:
network = Network("data/opsahl-ucsocial/out.opsahl-ucsocial")
time_quantile = .8

In [4]:
graph = StaticGraph.from_time_slice(network, time_quantile)

In [5]:
num_vertices = len(graph.nodes)
num_edges = len(graph.edges_set)
density = 2 * num_edges / num_vertices / (num_vertices - 1)

In [6]:
from queue import deque

def get_connected_comps(graph: Graph) -> list[list[int]]:
    visited = {u_id : False for u_id in graph.nodes}
    queue = deque(graph.nodes)
    conn_comps: list[list[int]] = []
    
    for root_u_id in graph.nodes.keys():
        if visited[root_u_id]:
            continue
            
        queue = deque([root_u_id])
        conn_comps.append([])
        
        while queue:
            u_id = queue.popleft()
            conn_comps[-1].append(u_id)
            if not visited[u_id]:
                for neighbor_id in graph.nodes[u_id].neighbors:
                    if not visited[neighbor_id]:
                        queue.append(neighbor_id)
                        visited[neighbor_id] = True
    return conn_comps

In [7]:
conn_comps = get_connected_comps(graph)
conn_comp = graph.get_subgraph(max(conn_comps, key=len))
max_conn_comp_fraction =  len(conn_comp.nodes) / num_vertices


In [8]:
from scipy.stats import pearsonr 

def get_avg_cluster_coeff(graph: Graph) -> float:
    return np.nanmean([graph.get_subgraph(node.neighbors).density
                       for node in graph.nodes.values()])

def get_deg_assortivity(graph: Graph) -> float:
    degs = np.vectorize(lambda node_id: graph.nodes[node_id].deg)(graph.edges_set)
    return pearsonr(degs[:, 0], degs[:, 1]).statistic

In [9]:
from itertools import combinations

In [10]:
def get_static_features(graph):
    size = graph.network.total_nodes + 1
    intersections = np.zeros((size, size))
    aa = np.zeros((size, size))
    degs = np.zeros(size)
    
    for node in graph.nodes.values():
        degs[node.u_id] = node.deg
        if node.deg > 1:
            intersections[np.ix_(node.neighbors, node.neighbors)] += 1
            aa[np.ix_(node.neighbors, node.neighbors)] += 1 / np.log(node.deg)
        
    # unions are needed for jaccard, thus need to add something to zeros
    unions = degs.reshape(-1, 1) + degs.reshape(1, -1) - intersections
    unions[unions == 0] = 1
    
    features = np.dstack([
        intersections,
        aa,
        intersections / unions,
        degs.reshape(-1, 1) * degs.reshape(1, -1),
    ])
    
    return features

In [11]:
@njit
def post_event_agg(weights: np.ndarray):
    return np.array([
        *np.quantile(weights, [0, .25, .5, .75, 1]),
        weights.sum(),
        weights.mean(),
        weights.var()
    ])

In [12]:
EPS = 1e-6

def get_features(graph: StaticGraph, lower_bound: float):
    t = graph.timestamps
    dt = (t - t.min()) / (t.max() - t.min())
    weights = lower_bound + (1 - lower_bound) * np.vstack([
        dt,
        (np.exp(3 * dt) - 1) / (np.exp(3) - 1),
        np.sqrt(dt)
    ]).T
    
    edges_weights = pd.concat([
        pd.DataFrame(graph.edges, columns=["v1", "v2"]),
        pd.DataFrame(weights, columns=["w1", "w2", "w3"])
    ], axis=1)\
        .groupby(["v1", "v2"])\
        [["w1", "w2", "w3"]]\
        .agg(pd.Series.to_list)\
        .applymap(np.array)\
        .applymap(post_event_agg)\
        .apply(lambda row: np.concatenate(row.values), axis=1)
    
    size = graph.network.total_nodes + 1
    features = np.zeros((size, size, 24))
    summs = np.zeros((size, size, 24))
    aa = np.zeros((size, size, 24))
    outgoing_sums = np.zeros((size, 24))
    
    features[*zip(*edges_weights.index), :] = np.vstack(edges_weights)
    
    for node in graph.nodes.values():
        node_features = features[node.u_id, node.neighbors]
        
        if node.deg > 1:
            t_summs = node_features[None, :] + node_features[:, None]
            summs[np.ix_(node.neighbors, node.neighbors)] = t_summs
            outgoing_sums[node.u_id] = node_features.sum(axis=0)
            aa[np.ix_(node.neighbors, node.neighbors)] = t_summs / np.log(1 + outgoing_sums[node.u_id] + EPS)
    
    out_features = np.dstack([
        aa, # AA
        summs, # CN
        summs / (outgoing_sums[None, :] + outgoing_sums[:, None] + EPS), # JC
        outgoing_sums[None, :] * outgoing_sums[:, None], # PA
    ])

    return out_features

In [13]:
t = get_features(graph, .2);

In [351]:
t.shape

(1900, 1900, 96)