# Store Minibatch
Saves the minibatch data structures in a `pickle` file

In [1]:
from __future__ import print_function
import os
import time
import datetime
import numpy as np
import pickle
import psutil
from decagon.deep.minibatch import EdgeMinibatchIterator

In [2]:
# Path to input file. Goes as parameter in script
in_file = './data/data_structures/DECAGON/DECAGON_toy_genes_500_drugs_400_se_4'

In [3]:
words = in_file.split('_')
d_text = ''

In [4]:
# BEGIN
start = time.time() #in seconds
pid = os.getpid()
ps= psutil.Process(pid)

In [5]:
with open(in_file, 'rb') as f:
    DS = pickle.load(f)
    for key in DS.keys():
        globals()[key]=DS[key]
        print(key,"Imported successfully")

edge2name Imported successfully
se_mono_name2idx Imported successfully
gene2idx Imported successfully
nonzero_feat Imported successfully
edge_type2dim Imported successfully
adj_mats_orig Imported successfully
edge_type2decoder Imported successfully
se_combo_name2idx Imported successfully
drug2idx Imported successfully
degrees Imported successfully
edge_types Imported successfully
num_edge_types Imported successfully
num_feat Imported successfully
feat Imported successfully


In [6]:
n_genes = len(gene2idx)
n_drugs = len(drug2idx)
n_se_combo = len(se_combo_name2idx)
print(n_genes,n_drugs,n_se_combo)

500 400 4


In [7]:
val_test_size = 0.15
batch_size = 128

In [8]:
print("Create minibatch iterator")
minibatch = EdgeMinibatchIterator(
    adj_mats=adj_mats_orig,
    feat=feat,
    edge_types=edge_types,
    batch_size=batch_size,
    val_test_size=val_test_size)

Create minibatch iterator
Minibatch edge type: (0, 1, 0)
Constructing test edges= 0000/0053
Constructing val edges= 0000/0053
Constructing train edges= 0000/0253
Train edges= 0253
Val edges= 0053
Test edges= 0053
Minibatch edge type: (1, 0, 0)
Constructing test edges= 0000/0053
Constructing val edges= 0000/0053
Constructing train edges= 0000/0253
Train edges= 0253
Val edges= 0053
Test edges= 0053
Minibatch edge type: (0, 0, 0)
Constructing test edges= 0000/0092
Constructing val edges= 0000/0092
Constructing train edges= 0000/0431


  rowdegree_mat_inv = sp.diags(np.nan_to_num(np.power(rowsum, -0.5)).flatten())
  coldegree_mat_inv = sp.diags(np.nan_to_num(np.power(colsum, -0.5)).flatten())


Train edges= 0431
Val edges= 0092
Test edges= 0092
Minibatch edge type: (1, 1, 0)
Constructing test edges= 0000/3112
Constructing test edges= 1000/3112
Constructing test edges= 2000/3112
Constructing test edges= 3000/3112
Constructing val edges= 0000/3112
Constructing val edges= 1000/3112
Constructing val edges= 2000/3112
Constructing val edges= 3000/3112
Constructing train edges= 0000/14529
Constructing train edges= 1000/14529
Constructing train edges= 2000/14529
Constructing train edges= 3000/14529
Constructing train edges= 4000/14529
Constructing train edges= 5000/14529
Constructing train edges= 6000/14529
Constructing train edges= 7000/14529
Constructing train edges= 8000/14529
Constructing train edges= 9000/14529
Constructing train edges= 10000/14529
Constructing train edges= 11000/14529
Constructing train edges= 12000/14529
Constructing train edges= 13000/14529
Constructing train edges= 14000/14529
Train edges= 14529
Val edges= 3112
Test edges= 3112
Minibatch edge type: (1, 1, 1)

In [9]:
out_file = 'data/data_structures/MINIBATCH/MINIBATCHwithTRAIN_'+words[2]+d_text+\
           '_genes_'+str(n_genes)+'_drugs_'+\
            str(n_drugs)+'_se_'+str(n_se_combo)+'_batchsize_'+str(batch_size)+\
            '_valsize_'+str(val_test_size)
print(out_file)

data/data_structures/MINIBATCH/MINIBATCHwithTRAIN_toy_genes_500_drugs_400_se_4_batchsize_128_valsize_0.15


In [None]:
memUse = ps.memory_info()
data = {}
data['minibatch'] = minibatch
data['mb_vms'] = memUse.vms
data['mb_rss'] = memUse.rss
data['mb_time'] = time.time()-start

In [None]:
with open(out_file,'wb') as f:
    pickle.dump(data, f, protocol=2)

In [None]:
print('Virtual memory:', memUse.vms*1e-09,'Gb')
print('RSS Memory:', memUse.rss*1e-09,'Gb')
print('Total time:', datetime.timedelta(seconds=time.time()-start))