##### %cd /scratch/bruingjde/SNAM2021-code/

from constants import *

In [86]:
df = pd.DataFrame(networks)
df['type'] = df['index'].isin(hypergraphs).replace({True: 'E', False: 'P'})
df.set_index('index', inplace=True)
df.drop(columns=['old_category'], inplace=True)
df.replace(
  {'category': {'social': 'S', 'information': 'I', 'technological': 'T'}},
  inplace=True)
df.rename(columns={'category': 'cat'}, inplace=True)

In [40]:
def get_size(network: int):
  edgelist = pd.read_pickle(f'data/{network:02}/edgelist.pkl')
  graph = nx.from_pandas_edgelist(edgelist)
  return pd.Series(
    dict(nodes=graph.number_of_nodes(), edges=graph.number_of_edges()),
    name=network
  )

size = tlp.ProgressParallel(n_jobs=network_count, total=network_count)(
  joblib.delayed(get_size)(network) for network in network_indices
)

def convert_int_to_short(x: int):
  if x > 2000000: return f'{x/1000000:.0f}M'
  elif x > 1000000: return f'{x/1000000:.1f}M'
  elif x > 2000: return f'{x/1000:.0f}K'
  elif x > 1000: return f'{x/1000:.1f}K'
  else: return str(x)
  
size = pd.DataFrame(size).applymap(convert_int_to_short)
size.rename(dict(nodes='n', edges='m'), inplace=True)

  0%|          | 0/26 [00:00<?, ?it/s]

In [122]:
stats = dict()
for network in tqdm(network_indices):
  with open(f'data/{network:02}/stats.json') as file:
    stats[network] = json.load(file)
stats = pd.DataFrame.from_dict(stats, orient='index')
stats.rename(
  columns={
    'density (nx.Graph)': 'density', 
    'degree assortativity (nx.Graph)': 'da',
    'average clustering coefficient': 'acc'}, 
  inplace=True)

def scientific_notation(x): 
  x = f'{x:.0e}'
  return x[:3] + x[4]
  
stats['density'] = stats['density'].apply(scientific_notation)
stats['da'] = stats['da'].round(2)
stats['acc'] = stats['acc'].round(2)
# stats = stats[['density', 'da', 'acc']]

  0%|          | 0/26 [00:00<?, ?it/s]

In [116]:
diameter = get_diameter()

  0%|          | 0/26 [00:00<?, ?it/s]

In [126]:
result = pd.concat([df, size, stats, diameter], axis=1)
result.dropna(inplace=True)
result = result.astype(dict(diam=int))
result.reset_index(inplace=True, drop=True)
result.index = result.index + 1

In [132]:
result[['label', 'avg events per pair']].sort_values('avg events per pair')

Unnamed: 0,label,avg events per pair
4,FB-w,1.0
8,FB-l,1.0
11,D-v,1.009546
9,D-rep,1.015235
22,loans,1.019506
19,chess,1.050273
10,D-f,1.118615
15,trust,1.209968
6,HepTh,1.353547
23,Wiki,1.438842


In [114]:
print(
  result.to_latex(
    caption=(
      'Networks used in this work. '
      'The following abbreviations are used in the columns; '
      'cat: category, '
#       'n: number of nodes, m: number of edges, '
#       'dens: density, '
      'da: degree assortativity, acc: average clustering coefficient, '
      'diam: diameter. '
      'In the column type, E marks a discrete interaction network, '
      'P indicates a network with persistent interactions.'
      'The K and M indicates thousands and millions, respectively. '
      'e marks the exponent of 10, i.e. 2e-5 = $2\\times 10^{-5}$'),
    label='table:datasets'))

\begin{table}
\centering
\caption{Networks used in this work.The following abbreviations are used in the columns; cat: category, da: degree assortativity, acc: average clustering coefficient, diam: diameter. In the column type, E marks a discrete event network, P indicates a network with persistent interactions.The K and M indicates thousands and millions, respectively. e marks the exponent of 10, i.e. 2e-5 = $2\times 10^{-5}$}
\label{table:datasets}
\begin{tabular}{lllllllrrr}
\toprule
{} &  label & cat & type & nodes & edges & density &    da &   acc &  diam \\
\midrule
1  &   DBLP &   S &    E &  1.8M &    8M &    5e-6 &  0.11 &  0.63 &    23 \\
2  &  HepPh &   I &    E &   17K &  1.2M &    8e-3 &  0.06 &  0.61 &     8 \\
3  &  Enron &   S &    E &   87K &  299K &    8e-5 & -0.17 &  0.12 &    14 \\
4  &   FB-w &   S &    P &   55K &  336K &    2e-4 &  0.22 &  0.12 &    16 \\
5  &  Condm &   S &    E &   17K &   55K &    4e-4 &  0.18 &  0.64 &    19 \\
6  &  HepTh &   I &    E &    7