In [11]:
# Web
import requests as rq

# Data analysis
import numpy as np
import pandas as pd
import json
%matplotlib notebook
import seaborn as sns
import matplotlib.pyplot as plt
import dask.dataframe as dd
import dask

# Convenience
from tqdm import tqdm
from collections import Counter, defaultdict
from functools import partial
import gc
import pickle
sns.set(style='darkgrid', palette='muted')

## Convenience stuff

In [2]:
class SpecialCounter(Counter):
    ''' Counter which can be divided by numeric. '''
    def __truediv__(self, other):
        new = SpecialCounter(self)
        for key in new.keys():
            new[key] /= other
        return new

## API calls variables

In [3]:
base_uri = "https://euretos-brain.com/spine-ws/"

auth = "/login/authenticate"
search = "/external/concepts/search"
direct_semantic = "/external/concept-to-semantic/direct"
indirect = "/external/concept-to-concept/indirect"
size = "?size=10000"

## Authenticate

In [4]:
login_info = json.load(open('../leiden_login_info/auth_info_main.json'))


login_req = rq.post(base_uri + auth, json=login_info)

headers = {'x-token': login_req.json()['token']}

## Load table

In [5]:
table = pd.read_hdf("./table_all_indirect.hd5")

In [12]:
?dask

In [10]:
table.to_hdf("./table_all_indirect_dask.hd5", "w", format="table")

TypeError: Cannot serialize the column [drug_ids] because
its data contents are [mixed] object dtype

In [8]:
?pd.DataFrame.to_hdf

## Get indirect relations

In [6]:
def get_indirect(row, filt=None):
    drug_ids = list(map(str, row['drug_ids']))
    disease_ids = list(map(str, row['disease_ids']))
    json_drug = {
      "additionalFields": ['directionalTriples', 'semanticCategory'],
      "positiveFilters": [filt] if filt else [],
      "leftInputs": drug_ids,
      "relationshipWeightAlgorithm": "PWS",
      "rightInputs": disease_ids,
      "sort": "ASC"
    }
    print('*', end="")
    resp = rq.post(base_uri + indirect + size, json=json_drug, headers=headers)
    return resp.json()

## Get semantic type counters

In [None]:
def sem_cat_gener(table):
    ''' Generator of semantic categories lists for 
    each drug-disease pair. '''
#     rows = table["all_in_between"]
    for ind, row in table.iterrows():
        sem_cat_list = list()
        drug_count = len(row["drug_ids"])
        disease_count = len(row["disease_ids"])
        content = row["all_in_between"]["content"]
        for entry in content:
            concept = entry["concepts"][1]
            sem_cat_list.append(concept["semanticCategory"])
        yield sem_cat_list, drug_count, disease_count
    #         if  length > 3:
    #             print(length)
    return

def get_sem_cat_counter_list(table):
    sem_cat_gen = sem_cat_gener(table)
    sem_cat_counter_list = [SpecialCounter(sem_cat_list) / (drug_count * disease_count) 
                    for sem_cat_list, drug_count, disease_count 
                   in sem_cat_gener(table)]
    return sem_cat_counter_list

## Randomize and fetch indirect relations

In [None]:
def randomize_drugs_diseases(number, table):
    ''' This function randomizes table and fetches indirect 
    relations certain number of times.'''
    random_table_counters = list()
    for num in range(number):
        random_table = table.copy()[['drug_ids', 'disease_ids']]

        disease_randomized = random_table['disease_ids'].sample(frac=1, random_state=num).reset_index(drop=True)
        random_table['disease_ids'] = disease_randomized
        
        drug_randomized = random_table['drug_ids'].sample(frac=1, random_state=num+1).reset_index(drop=True)
        random_table['drug_ids'] = drug_randomized
        
        print("Fetching indirect...")
        random_table["all_in_between"] = random_table.apply(get_indirect, axis=1)
        counter = get_sem_cat_counter_list(random_table)
        
        random_table.to_hdf('./random/random_table_%i.hd5' % num, 'w') # for safety
        del random_table
        gc.collect()
        
        random_table_counters.append(counter)
        
    return random_table_counters

In [None]:
number = 20
random_counters = randomize_drug_diseases(number, table)