In [1]:
%cd /scratch/bruingjde/SNAM2021-code/

from constants import *

/scratch/bruingjde/SNAM2021-code


In [2]:
df = networks.copy()
df['index'] = df.index
df['type'] = (
  df['index'].isin(hypergraph_indices).replace({True: 'E', False: 'P'}))
df['source'] = df['source'].apply(lambda x: f'\cite{{{x}}}')
df['cat'] = df['category'].replace(
  {'social': 'S', 'information': 'I', 'technological': 'T'})
df.drop(columns=['old_category', 'category'], inplace=True)

In [3]:
def get_size(network: int):
  edgelist = pd.read_pickle(f'data/{network:02}/edgelist.pkl')
  graph = nx.from_pandas_edgelist(edgelist, create_using=nx.MultiGraph)
    
  return pd.Series(
    dict(nodes=graph.number_of_nodes(), edges=graph.number_of_edges()),
    name=network
  )

size = tlp.ProgressParallel(n_jobs=network_count, total=network_count)(
  joblib.delayed(get_size)(network) for network in network_indices
)
size = pd.DataFrame(size) 
size.rename(dict(nodes='n', edges='m'), inplace=True)

  0%|          | 0/26 [00:00<?, ?it/s]

In [22]:
stats = dict()
for network in tqdm(network_indices):
  with open(f'data/{network:02}/stats.json') as file:
    stats[network] = json.load(file)
stats = pd.DataFrame.from_dict(stats, orient='index')
stats.rename(columns={'density (nx.Graph)': 'density', 'degree assortativity (nx.Graph)': 'degree assortativity'}, inplace=True)

columns = ['density', 'degree assortativity', 
           'average clustering coefficient']

stats = stats[columns].copy()

# stats.drop(columns=['nodes', 'edges', 'density (nx.MultiGraph)', 'fraction edges in GC'], inplace=True)

  0%|          | 0/26 [00:00<?, ?it/s]

In [6]:
diameter = pd.Series(get_diameter(), name='diameter')

  0%|          | 0/26 [00:00<?, ?it/s]

In [8]:
def convert_int_to_short(x: int):
  if x > 2000000: return f'{x/1000000:.0f}M'
  elif x > 1000000: return f'{x/1000000:.1f}M'
  elif x > 2000: return f'{x/1000:.0f}K'
  elif x > 1000: return f'{x/1000:.1f}K'
  else: return str(x)
  
def scientific_notation(x): 
  x = f'{x:.0e}'
  coefficient = x[0]
  exponent = x[-1]
  return f'${coefficient} \times 10^{exponent}$'

In [61]:
result = pd.concat([df, stats, size, diameter], axis=1)
result.dropna(inplace=True)
result = result.astype(dict(diameter=int))
result.reset_index(inplace=True, drop=True)
result.index = result.index + 1
result['nodes'] = result['nodes'].apply(convert_int_to_short)
result['edges'] = result['edges'].apply(convert_int_to_short)
result['density'] = result['density'].apply(scientific_notation)
result['degree assortativity'] = result['degree assortativity'].round(2)
result['average clustering coefficient'] = (
  result['average clustering coefficient'].round(2))

In [62]:
columns = [
  'label', 'cat', 'type', 'nodes', 'edges', 'density', 
  'degree assortativity', 'average clustering coefficient', 'diameter', 
  'source']

In [66]:
print(
  result[columns].rename(columns={'source': ' ', 'degree assortativity': 'da', 
                                  'average clustering coefficient': 'acc'}
                        ).to_latex(
    index=False,
    caption=(
      'Networks used in this work. '
      'The following abbreviations are used in the columns; '
      'cat: category, '
      'da: degree assortativity, acc: average clustering coefficient. '
      'In the column type, E marks a discrete interaction network, '
      'P indicates a network with persistent interactions.'
      'The K and M indicates thousands and millions, respectively. '),
    label='table:datasets',
    escape=False,
    multicolumn=False))

\begin{table}
\centering
\caption{Networks used in this work. The following abbreviations are used in the columns; cat: category, da: degree assortativity, acc: average clustering coefficient. In the column type, E marks a discrete interaction network, P indicates a network with persistent interactions.The K and M indicates thousands and millions, respectively. }
\label{table:datasets}
\begin{tabular}{llllllrrrl}
\toprule
label & cat & type & nodes & edges &         density &    da &  acc &  diameter &                          \\
\midrule
 DBLP &   S &    E &  1.8M &   29M & $5 \times 10^6$ &  0.11 & 0.63 &        23 &           \cite{Ley2002} \\
HepPh &   I &    E &   17K &    2M & $8 \times 10^3$ &  0.06 & 0.61 &         8 &      \cite{Leskovec2007} \\
Enron &   S &    E &   87K &  1.1M & $8 \times 10^5$ & -0.17 & 0.12 &        14 &         \cite{Klimt2004} \\
 FB-w &   S &    P &   55K &  336K & $2 \times 10^4$ &  0.22 & 0.12 &        16 &     \cite{Viswanath2009} \\
Condm &   S &  