In [1]:
# !python -m pip install git+https://github.com/pathpy/pathpyG.git

In [2]:
import torch
from torch_geometric.data import TemporalData
import numpy as np
import pathpyG as pp

pp.config['torch']['device'] = 'cpu'

In [3]:
import pandas as pd
from datetime import datetime
from tqdm import tqdm

In [4]:
def load_edgelist():
    def date_to_timestamp(date_string: str):
        return int(datetime.strptime(date_string, "%Y-%m-%d %H:%M:%S").timestamp())

    reddit = pd.read_csv('./edges.tsv', delimiter='\t')
    data = data = reddit[['SOURCE_SUBREDDIT', 'TARGET_SUBREDDIT', 'TIMESTAMP']]
    data.columns = ['source', 'target', 'timestamp']

    return [(row['source'], row['target'], date_to_timestamp(row['timestamp'])) for _, row in data.iterrows()]

In [5]:
edges = load_edgelist()

In [6]:
edges = [list(edge) for edge in edges]

In [81]:
edges

[['subreddit1', 'subreddit2', 1388504358],
 ['subreddit2', 'subreddit3', 1388504368],
 ['subreddit2', 'subreddit3', 1389770565],
 ['subreddit3', 'subreddit1', 1391339430]]

In [8]:
g = pp.TemporalGraph.from_edge_list(edges)
print(g)

Temporal Graph with 3 nodes 3 edges and 4 time-stamped events in [1388504358, 1391339430]

Node attributes
	node_id		<class 'list'>

Graph attributes
	num_nodes		<class 'int'>
	dst		<class 'torch.Tensor'> -> torch.Size([4])
	src		<class 'torch.Tensor'> -> torch.Size([4])
	t		<class 'torch.Tensor'> -> torch.Size([4])



In [92]:
dag = pp.algorithms.temporal_graph_to_event_dag(g, delta = 1)
x = pp.algorithms.extract_causal_trees(dag)
print(x)
paths = pp.PathData.from_temporal_dag(dag)

{'subreddit1-1388504358': tensor([[0],
        [1]], dtype=torch.int32), 'subreddit2-1388504368': tensor([[2],
        [3]], dtype=torch.int32), 'subreddit2-1389770565': tensor([[4],
        [5]], dtype=torch.int32), 'subreddit3-1391339430': tensor([[6],
        [7]], dtype=torch.int32)}


In [93]:
print(paths.num_paths)

4
