In [1]:
%cd /scratch/bruingjde/SNAM2021-code/

import os
import typing

import joblib
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import pandas as pd
import scipy
import seaborn as sns
import sklearn.linear_model
import sklearn.model_selection
import sklearn.metrics
import sklearn.pipeline
import sklearn.preprocessing
from tqdm.auto import tqdm

import tlp

/scratch/bruingjde/SNAM2021-code


In [5]:
# Check
for entry in os.scandir('data'):
  if not os.path.isfile(os.path.join(entry, 'features', 'RA.pkl')): print(entry)

In [2]:
def ra(path: str, verbose: bool = False, feature_name='RA'):
  # Check if file exists
  feature_path = os.path.join(path, 'features')
  file = os.path.join(feature_path, f'{feature_name}.pkl')
  if os.path.isfile(file): return 
  
  os.makedirs(feature_path, exist_ok=True)

  # Read in
  edgelist_mature_file = os.path.join(path, 'edgelist_mature.pkl')
  if verbose: print(f'Read {edgelist_mature_file}')
  edgelist_mature = pd.read_pickle(edgelist_mature_file)
  
  instances_file = os.path.join(path, 'instances_sampled.npy')
  if verbose: print(f'Read {instances_file}')  
  instances_sampled = np.load(instances_file)

  # Get edgelist
  G = nx.from_pandas_edgelist(edgelist_mature)
  
  # Calculate
  scores = [p for _, _, p in nx.resource_allocation_index(G, instances_sampled)]
  
  # Store
  result = {tlp.Experiment(feature=feature_name, time_aware=False): np.array(scores)}
  joblib.dump(result, os.path.join(feature_path, f'{feature_name}.pkl'))

In [3]:
ra('data/13')

In [4]:
entries = sorted(os.scandir('data'), key=lambda x: x.name)
tlp.ProgressParallel(n_jobs=len(entries), total=len(entries))(
  joblib.delayed(ra)(entry.path) for entry in entries
)

  0%|          | 0/30 [00:00<?, ?it/s]

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]