# Building the Event Graphs for Study

In [1]:
import pandas as pd 
from eventgraphs import EventGraph
import numpy as np
import json
import pickle

## 1. SocioPatterns - School (Hypergraph)

In [2]:
import json
with open('./data/sociopatterns-primaryschool.json') as file:
    data = json.load(file)

In [6]:
EG = EventGraph.from_dict_eventlist(data, graph_rules='teg')
EG.event_graph_rules['delta_cutoff'] = 1e9
EG.build(verbose=True)



In [8]:
EG.calculate_edge_motifs()

In [9]:
EG.save('./data/eventgraphs/sociopatterns-primary.json')

## 2. UC Irvine Social Network (Pairwise)

In [19]:
data = pd.read_csv('../[Paper] Temporal Event Graph/messages.txt',
                   sep=' ',
                   header=None, 
                   names=['source', 'target', 'time'],
                   parse_dates=[2],
                  )

data.time = data.time.astype(int)
data['timestamp'] = pd.to_datetime(data.time, unit='s', utc=False)
data.time = data.time - data.time[0]

In [21]:
EG = EventGraph.from_pandas_eventlist(data, graph_rules='teg')
EG.event_graph_rules['delta_cutoff'] = 1e9
EG.build(verbose=True)



In [22]:
EG.calculate_edge_motifs()

In [23]:
EG.save('./data/eventgraphs/social-ucirvine.json')

## 3. Twitter - Emirates (Hypergraph)

In [14]:
with open('/scratch/mellor/twitter_data/emirates_sunday_hyperevents.json') as file:
    data = json.load(file)

In [15]:
EG = EventGraph.from_dict_eventlist(data, graph_rules='teg')
EG.event_graph_rules['delta_cutoff'] = 1e9
EG.build(verbose=True)



In [16]:
EG.calculate_edge_motifs()

In [17]:
EG.save('./data/eventgraphs/twitter-emirates.json')

## 4. Random Graph (Pairwise)

In [2]:
t = 0
N = 500
nodes = np.arange(N)
M = 50000 
events = np.zeros((M,3))
for i in range(M):
    u, v = np.random.choice(nodes, size=2, replace=False)
    events[i,:] = (u,v,t)
    t += np.random.exponential()

In [3]:
data = pd.DataFrame(events, columns=['source','target','time'])
data[['source','target']] = data[['source','target']].astype(int)

In [4]:
EG = EventGraph.from_dict_eventlist(data, graph_rules='teg')
EG.event_graph_rules['delta_cutoff'] = 1e9
EG.build(verbose=True)



In [5]:
EG.calculate_edge_motifs()

In [6]:
EG.save('./data/eventgraphs/random-complete.json')

## 5. ArXiv Postings (Hypergraph)

In [3]:
with open('/scratch/mellor/data/metadata_dic.pkl', 'rb') as file:
    data = pickle.load(file)

In [6]:
events = []
for key, value in data.items():
    try:
        source = [x['name'] for x in value['authors']]
        target = []
        time = pd.to_datetime(value['updated'])
        events.append({'source': source,
                       'target': target,
                       'time': time})
    except:
        print(key,value)

In [9]:
data = pd.DataFrame(events).sort_values('time').reset_index(drop=True)
data.time = (data.time - data.time[0]).dt.days

Timedelta('10444 days 17:55:52')

In [63]:
EG = EventGraph.from_dict_eventlist(data, graph_rules='teg')
EG.event_graph_rules['delta_cutoff'] = 1e9
EG.build(verbose=True)



In [64]:
EG.calculate_edge_motifs()

In [65]:
EG.save('./data/eventgraphs/academic-coauthors.json')

# Statistics

In [3]:
from glob import glob
for file in glob('./data/eventgraphs/*'):
    filename = file.split('/')[-1].split('.')[0]
    print(filename)
    EG = EventGraph.from_file(file)
    print("Nodes:", EG.N)
    print("Events:", EG.M)
    print("Duration:", EG.D, end='\n\n')

twitter-emirates
Nodes: 53251
Events: 167664
Duration: 86398

random-er
Nodes: 500
Events: 50000
Duration: 50162.9154966

academic-coauthors
Nodes: 30927
Events: 54177
Duration: 10444

sociopatterns-primary
Nodes: 242
Events: 38923
Duration: 116920

social-ucirvine
Nodes: 1899
Events: 59835
Duration: 16736181

