In [2]:
%cd /scratch/bruingjde/SNAM2021-code/

from constants import *

/scratch/bruingjde/SNAM2021-code


In [3]:
df = networks.copy()
df['index'] = df.index
df['type'] = (
  df['index'].isin(hypergraph_indices).replace({True: 'E', False: 'P'}))
df['source'] = df['source'].apply(lambda x: f'\cite{{{x}}}')
df['cat'] = df['category'].replace(
  {'social': 'S', 'information': 'I', 'technological': 'T'})
df.drop(columns=['old_category', 'category'], inplace=True)

In [8]:
def get_size(network: int):
  edgelist = pd.read_pickle(f'data/{network:02}/edgelist.pkl')
  graph = nx.from_pandas_edgelist(edgelist)
  return pd.Series(
    dict(nodes=graph.number_of_nodes(), edges=graph.number_of_edges()),
    name=network
  )

size = tlp.ProgressParallel(n_jobs=network_count, total=network_count)(
  joblib.delayed(get_size)(network) for network in network_indices
)
size = pd.DataFrame(size) 
size.rename(dict(nodes='n', edges='m'), inplace=True)

  0%|          | 0/26 [00:00<?, ?it/s]

In [13]:
stats = dict()
for network in tqdm(network_indices):
  with open(f'data/{network:02}/stats.json') as file:
    stats[network] = json.load(file)
stats = pd.DataFrame.from_dict(stats, orient='index')

stats.rename(columns={'density (nx.Graph)': 'density'}, inplace=True)
stats.drop(columns=['nodes', 'edges'], inplace=True)

  0%|          | 0/26 [00:00<?, ?it/s]

In [10]:
diameter = pd.Series(get_diameter(), name='diameter')

  0%|          | 0/26 [00:00<?, ?it/s]

In [11]:
result = pd.concat([df, stats, size, diameter], axis=1)

In [19]:
simplegraph_indices

{4, 8, 9, 10, 11, 16, 18, 20, 21, 24}

In [17]:
result['avg events per pair'].sort_values()

4      1.000000
8      1.000000
11     1.009546
9      1.015235
24     1.019506
21     1.050273
10     1.118615
16     1.209968
6      1.353547
25     1.438842
5      1.593639
20     1.722823
18     1.746068
23     1.838463
22     1.898487
2      1.944224
28     2.257692
29     2.460012
14     2.533092
1      3.533745
3      3.836883
13     4.795904
7      7.082018
19     8.793729
30    20.688123
12    25.508151
Name: avg events per pair, dtype: float64

In [None]:
def convert_int_to_short(x: int):
  if x > 2000000: return f'{x/1000000:.0f}M'
  elif x > 1000000: return f'{x/1000000:.1f}M'
  elif x > 2000: return f'{x/1000:.0f}K'
  elif x > 1000: return f'{x/1000:.1f}K'
  else: return str(x)
  
def scientific_notation(x): 
  x = f'{x:.0e}'
  coefficient = x[0]
  exponent = x[-1]
  return f'${coefficient} \times 10^{exponent}$'

'degree assortativity (nx.Graph)': 'da',
'average clustering coefficient': 'acc'

In [7]:
result.dropna(inplace=True)
result = result.astype(dict(diameter=int))
result.reset_index(inplace=True, drop=True)
result.index = result.index + 1

In [8]:
columns = [
  'label', 'cat', 'type', 'nodes', 'edges', 'density', 'da', 'acc', 'diameter', 
  'source']

In [9]:
print(
  result[columns].rename(columns={'source': ' '}).to_latex(
    caption=(
      'Networks used in this work. '
      'The following abbreviations are used in the columns; '
      'cat: category, '
      'da: degree assortativity, acc: average clustering coefficient. '
      'In the column type, E marks a discrete interaction network, '
      'P indicates a network with persistent interactions.'
      'The K and M indicates thousands and millions, respectively. '),
    label='table:datasets',
    escape=False,
    multicolumn=False))

\begin{table}
\centering
\caption{Networks used in this work. The following abbreviations are used in the columns; cat: category, da: degree assortativity, acc: average clustering coefficient. In the column type, E marks a discrete interaction network, P indicates a network with persistent interactions.The K and M indicates thousands and millions, respectively. }
\label{table:datasets}
\begin{tabular}{lllllllrrrl}
\toprule
{} &  label & cat & type & nodes & edges &          density &    da &   acc &  diameter &                           \\
\midrule
1  &   DBLP &   S &    E &  1.8M &    8M &  $5 \times 10^6$ &  0.11 &  0.63 &        23 &            \cite{Ley2002} \\
2  &  HepPh &   I &    E &   17K &  1.2M &  $8 \times 10^3$ &  0.06 &  0.61 &         8 &       \cite{Leskovec2007} \\
3  &  Enron &   S &    E &   87K &  299K &  $8 \times 10^5$ & -0.17 &  0.12 &        14 &          \cite{Klimt2004} \\
4  &   FB-w &   S &    P &   55K &  336K &  $2 \times 10^4$ &  0.22 &  0.12 &        16 