In [1]:
%cd /scratch/bruingjde/SNAM2021-code/

from constants import *

/scratch/bruingjde/SNAM2021-code


# Static features

In [2]:
def logistic_regression_I(network: int, heuristic: str):
  X = np.load(f'data/{network:02}/features/time_agnostic/{heuristic}.npy')
  X = X.reshape(-1, 1)
    
  y = np.load(f'data/{network:02}/targets_sampled.npy')
  
  X_train, X_test, y_train, y_test = (
    sklearn.model_selection.train_test_split(X, y))
  pipe = sklearn.pipeline.make_pipeline(
    sklearn.preprocessing.StandardScaler(),
    sklearn.linear_model.LogisticRegression(max_iter=10000)) # type: ignore
  pipe.fit(X_train, y_train)
  
  auc = sklearn.metrics.roc_auc_score(
    y_true=y_test, y_score=pipe.predict_proba(X_test)[:,1]) # type: ignore
  
  return auc

exp1 = {
  heuristic: (
    [logistic_regression_I(network, heuristic) for network in network_indices])
  for heuristic in heuristics
}
exp1 = pd.DataFrame(exp1)

# Node temporal features

In [5]:
def logistic_regression_I_II_hypergraph(network: int, heuristic: str):
  X = {
    'time agnostic': (
      np.load(f'data/{network:02}/features/time_agnostic/{heuristic}.npy'))
  }      
   
  for time_strategy in time_strategies:
    for nodepair_strategy in nodepair_strategies:
      for aggregation_strategy in ['m0', 'm1', 'q0', 'q25', 'q50', 'q75', 'q100']:
        X[(time_strategy, nodepair_strategy, aggregation_strategy)] = (
          np.load(
            f'data/{network:02}/features/time_node/'
            f'{time_strategy}_{nodepair_strategy}_{aggregation_strategy}.npy'))
    
  X = pd.DataFrame(X)
    
  y = np.load(f'data/{network:02}/targets_sampled.npy')
  
  X_train, X_test, y_train, y_test = (
    sklearn.model_selection.train_test_split(X, y))
  pipe = sklearn.pipeline.make_pipeline(
    sklearn.preprocessing.StandardScaler(),
    sklearn.linear_model.LogisticRegression(max_iter=10000))
  pipe.fit(X_train, y_train)
  
  auc = sklearn.metrics.roc_auc_score(
    y_true=y_test, y_score=pipe.predict_proba(X_test)[:,1])
  
  return auc

def logistic_regression_I_II_simplegraph(network: int, heuristic: str):
  X = {
    'time agnostic': (
      np.load(f'data/{network:02}/features/time_agnostic/{heuristic}.npy'))
  }  
    
  for time_strategy in time_strategies:
    for nodepair_strategy in nodepair_strategies:
        X[(time_strategy, nodepair_strategy)] = (
          np.load(
            f'data/{network:02}/features/time_node/'
            f'{time_strategy}_{nodepair_strategy}.npy'))
    
  X = pd.DataFrame(X)
    
  y = np.load(f'data/{network:02}/targets_sampled.npy')
  
  X_train, X_test, y_train, y_test = (
    sklearn.model_selection.train_test_split(X, y))
  pipe = sklearn.pipeline.make_pipeline(
    sklearn.preprocessing.StandardScaler(),
    sklearn.linear_model.LogisticRegression(max_iter=10000))
  pipe.fit(X_train, y_train)
  
  auc = sklearn.metrics.roc_auc_score(
    y_true=y_test, y_score=pipe.predict_proba(X_test)[:,1])
  
  return auc

exp2 = {
  heuristic: [
      logistic_regression_I_II_hypergraph(network, heuristic) 
      for network in hypergraph_indices
    ] + [
      logistic_regression_I_II_simplegraph(network, heuristic)
      for network in simplegraph_indices
    ]
  for heuristic in heuristics
}
exp2 = pd.DataFrame(exp2)

In [6]:
def logistic_regression_III_hypergraph(network: int, heuristic: str):
  X = dict()  
  for time_strategy in time_strategies:
    for aggregation_strategy in aggregation_strategies:
      if aggregation_strategy not in ['m2', 'm3']:
        X[(heuristic, time_strategy, aggregation_strategy)] = (
          np.load(
            f'data/{network:02}/features/time_edge/'
            f'{heuristic}_{time_strategy}_{aggregation_strategy}.npy'))
    
  X = pd.DataFrame(X)
    
  y = np.load(f'data/{network:02}/targets_sampled.npy')
  
  X_train, X_test, y_train, y_test = (
    sklearn.model_selection.train_test_split(X, y))
  pipe = sklearn.pipeline.make_pipeline(
    sklearn.preprocessing.StandardScaler(),
    sklearn.linear_model.LogisticRegression(max_iter=10000))
  pipe.fit(X_train, y_train)
  
  auc = sklearn.metrics.roc_auc_score(
    y_true=y_test, y_score=pipe.predict_proba(X_test)[:,1])
  
  return auc

def logistic_regression_III_simplegraph(network: int, heuristic: str):
  X = dict()  
  for time_strategy in time_strategies:
    X[(heuristic, time_strategy)] = np.load(
      f'data/{network:02}/features/time_edge/{heuristic}_{time_strategy}.npy')
    
  X = pd.DataFrame(X)
    
  y = np.load(f'data/{network:02}/targets_sampled.npy')
  
  X_train, X_test, y_train, y_test = (
    sklearn.model_selection.train_test_split(X, y))
  pipe = sklearn.pipeline.make_pipeline(
    sklearn.preprocessing.StandardScaler(),
    sklearn.linear_model.LogisticRegression(max_iter=10000))
  pipe.fit(X_train, y_train)
  
  auc = sklearn.metrics.roc_auc_score(
    y_true=y_test, y_score=pipe.predict_proba(X_test)[:,1])
  
  return auc

exp3 = {
  heuristic: [
      logistic_regression_I_II_hypergraph(network, heuristic) 
      for network in hypergraph_indices
    ] + [
      logistic_regression_I_II_simplegraph(network, heuristic)
      for network in simplegraph_indices
    ]
  for heuristic in heuristics
}
exp3 = pd.DataFrame(exp3)

In [7]:
df = pd.concat({'I': exp1.T, 'I+II': exp2.T, 'III': exp3.T}, axis=1)

In [8]:
print(
  df.mean(axis=1, level=0).round(2).to_latex(
    caption=(
      'Performance, measured by the AUC, of the different experiments for the '
      'various heuristics.'),
    label='fig:heuristics')
)

\begin{table}
\centering
\caption{Performance, measured by the AUC, of the different experiments for the various heuristics.}
\label{fig:heuristics}
\begin{tabular}{lrrr}
\toprule
{} &     I &  I+II &   III \\
\midrule
aa &  0.78 &  0.87 &  0.87 \\
cn &  0.74 &  0.86 &  0.86 \\
jc &  0.64 &  0.86 &  0.85 \\
pa &  0.76 &  0.84 &  0.84 \\
\bottomrule
\end{tabular}
\end{table}

