# Evaluating efficiency gains from caching

In [1]:
%load_ext autoreload
%autoreload 2

%matplotlib inline
import collections
import time

import hetmech.degree_weight
import hetmech.hetmat

In [2]:
hetmat = hetmech.hetmat.HetMat('../data/hetionet-v1.0.hetmat/')

In [3]:
metapaths = list()
for metapath in hetmat.metagraph.extract_metapaths('Compound', 'Disease', max_length=4):
    if hetmech.degree_weight.categorize(metapath) in {'long_repeat', 'other'}:
        continue
    metapaths.append(metapath)
len(metapaths)

1172

In [4]:
segment_counts = hetmech.degree_weight.order_segments(hetmat.metagraph, metapaths, store_inverses=False)

In [5]:
segment_counts.most_common(10)

[(CpD, 306),
 (CtD, 306),
 (CdG, 291),
 (CbG, 291),
 (CuG, 291),
 (GdD, 279),
 (GuD, 279),
 (GaD, 279),
 (G<rG, 144),
 (DrD, 91)]

In [6]:
# Why are there so few segments?
len(segment_counts)

103

In [7]:
def compute_dwpcs(allocate_GB):
    print(f'Beginning DWPCs for {len(metapaths):,} metapaths with a cache allocation of {allocate_GB:.2f} GB')
    cache = hetmech.hetmat.PathCountPriorityCache(hetmat, allocate_GB=allocate_GB)
    cache.priorities = segment_counts
    hetmat.path_counts_cache = cache
    start = time.perf_counter()
    for metapath in metapaths:
        rows_ids, col_ids, matrix = hetmech.degree_weight.dwpc(hetmat, metapath, dense_threshold=0.7)
    end = time.perf_counter()
    total = time.strftime('%H:%M:%S', time.gmtime(end - start))
    print(f'Computation complete in {total}')
    print(f'Cache contains {len(cache.cache):,} items, totaling {cache.current_B / 1_000_000_000:.2f} GB')

In [8]:
for allocate_GB in 0, 0.2, 1, 5, 20:
    compute_dwpcs(allocate_GB)

Beginning DWPCs for 1,172 metapaths with a cache allocation of 0.00 GB
Computation complete in 00:28:40
Cache contains 0 items, totaling 0.00 GB
Beginning DWPCs for 1,172 metapaths with a cache allocation of 0.20 GB
Computation complete in 00:27:30
Cache contains 300 items, totaling 0.20 GB
Beginning DWPCs for 1,172 metapaths with a cache allocation of 1.00 GB
Computation complete in 00:27:25
Cache contains 1,256 items, totaling 0.69 GB
Beginning DWPCs for 1,172 metapaths with a cache allocation of 5.00 GB
Computation complete in 00:27:20
Cache contains 1,256 items, totaling 0.69 GB
Beginning DWPCs for 1,172 metapaths with a cache allocation of 20.00 GB
Computation complete in 00:27:23
Cache contains 1,256 items, totaling 0.69 GB
