In [44]:
import pandas as pd
import numpy as np
from sklearn.linear_model import RidgeCV
from scipy import stats

In [17]:
genes = pd.read_csv('../data/gene2ind.txt', sep='\t', header=None, names=(['I', 'G']))['G']
cell_gene_matrix = np.loadtxt('../data/cell2mutation.txt', delimiter=',')
cell_index = pd.read_csv("../data/cell2ind.txt", sep="\t", header=None, names=['I', 'C'])
test_df = pd.read_csv("../data/drugcell_test.txt", sep='\t', header=None, names=['C', 'D', 'AUC'])
drugs = pd.read_csv('../data/drug2ind.txt', sep='\t', header=None, names=['I', 'D'])['D']
ontology = pd.read_csv('../data/drugcell_ont.txt', sep='\t', header=None, names=['S', 'T', 'I'])

predicted_vals = np.loadtxt('../result/drugcell.predict')

cell_id_map = dict(zip(cell_index['C'], cell_index['I']))

cell_line_ids = np.array([cell_id_map[x] for x in test_df['C'].tolist()])

In [15]:
#creating hidden files for genes to be used in rlipp

for i, gene in enumerate(genes):
    file_name = '../rlipp/hidden/' + gene + '.hidden'
    mat_data_sub = cell_gene_matrix[cell_line_ids, i].ravel()
    np.savetxt(file_name, mat_data_sub, fmt='%.3f')

In [18]:
#Separating data for each drug

drug_pos_map = {d:[] for d in drugs}
for d in drugs:
    for i, row in test_df.iterrows():
        if row['D'] == d:
            drug_pos_map[d].append(i)


In [22]:
terms = ontology['S'].unique().tolist()

In [46]:
def get_features(term, index_list):
    file_name = '../rlipp/hidden/' + term + '.hidden'
    features = []
    if term in terms:
        features = np.loadtxt(file_name, usecols=range(6))
    else:
        features = np.loadtxt(file_name, usecols=range(1))
    return np.take(features, index_list, axis=0)


def get_child_features(term, index_list):
    child_features = []
    children = [row['T'] for _,row in ontology.iterrows() if row['S']==term]
    for child in children:
        child_features.append(get_features(child, index_list))
    return np.column_stack((f for f in child_features))


def exec_lm(X, y):
    regr = RidgeCV(fit_intercept=False, cv=5)
    regr.fit(X, y)
    y_pred = regr.predict(X)
    return stats.spearmanr(y_pred, y)[0]

In [48]:
result = []
for i,d in enumerate(drugs):
    y = np.take(predicted_vals, drug_pos_map[d])
    for j,t in enumerate(terms):
        X_parent = get_features(t, drug_pos_map[d])
        X_child = get_child_features(t, drug_pos_map[d])
        p_rho = exec_lm(X_parent, y)
        c_rho = exec_lm(X_child, y)
        rlipp = (p_rho - c_rho)/c_rho
        result.append({'Drug':d, 'Term':t, 'P_rho':p_rho, 'C_rho':c_rho, 'RLIPP':rlipp})

  app.launch_new_instance()


{'Drug': 'C[C@H]1CN(C(=O)C2=C(C(=CC=C2)NS(=O)(=O)C3=CC=C(C=C3)F)O[C@H]1CN(C)C(=O)NC4CCCCC4)[C@H](C)CO', 'Term': 'cut0-24-Louv4', 'P_rho': 0.7396240261947075, 'C_rho': 0.9383298520953229, 'RLIPP': -0.21176543137458378}


  app.launch_new_instance()


{'Drug': 'C[C@H]1CN(C(=O)C2=C(C(=CC=C2)NS(=O)(=O)C3=CC=C(C=C3)F)O[C@H]1CN(C)C(=O)NC4CCCCC4)[C@H](C)CO', 'Term': '23039', 'P_rho': 0.034212816740333926, 'C_rho': 0.13845593705270792, 'RLIPP': -0.7528974382130709}


  app.launch_new_instance()


{'Drug': 'C[C@H]1CN(C(=O)C2=C(C(=CC=C2)NS(=O)(=O)C3=CC=C(C=C3)F)O[C@H]1CN(C)C(=O)NC4CCCCC4)[C@H](C)CO', 'Term': 'cut0-21-Louv16', 'P_rho': 0.17101586315925354, 'C_rho': 0.9225810093720376, 'RLIPP': -0.8146332284948539}


  app.launch_new_instance()


{'Drug': 'C[C@H]1CN(C(=O)C2=C(C(=CC=C2)NS(=O)(=O)C3=CC=C(C=C3)F)O[C@H]1CN(C)C(=O)NC4CCCCC4)[C@H](C)CO', 'Term': '22766', 'P_rho': 0.09758057019814771, 'C_rho': 0.15228489846972507, 'RLIPP': -0.3592235922359224}


  app.launch_new_instance()


{'Drug': 'C[C@H]1CN(C(=O)C2=C(C(=CC=C2)NS(=O)(=O)C3=CC=C(C=C3)F)O[C@H]1CN(C)C(=O)NC4CCCCC4)[C@H](C)CO', 'Term': '23030', 'P_rho': 0.12880240398114762, 'C_rho': 0.16272319612558706, 'RLIPP': -0.2084570175124875}


  app.launch_new_instance()


{'Drug': 'C[C@H]1CN(C(=O)C2=C(C(=CC=C2)NS(=O)(=O)C3=CC=C(C=C3)F)O[C@H]1CN(C)C(=O)NC4CCCCC4)[C@H](C)CO', 'Term': '23049', 'P_rho': 0.07580370272086231, 'C_rho': 0.038047908591107196, 'RLIPP': 0.9923224568138194}


  app.launch_new_instance()


{'Drug': 'C[C@H]1CN(C(=O)C2=C(C(=CC=C2)NS(=O)(=O)C3=CC=C(C=C3)F)O[C@H]1CN(C)C(=O)NC4CCCCC4)[C@H](C)CO', 'Term': '23311', 'P_rho': -0.00660063108643422, 'C_rho': 0.2253876152472393, 'RLIPP': -1.02928568670108}


  app.launch_new_instance()


{'Drug': 'C[C@H]1CN(C(=O)C2=C(C(=CC=C2)NS(=O)(=O)C3=CC=C(C=C3)F)O[C@H]1CN(C)C(=O)NC4CCCCC4)[C@H](C)CO', 'Term': 'ROOT', 'P_rho': 0.7322710850182296, 'C_rho': 0.9941077678682139, 'RLIPP': -0.263388629797625}


KeyboardInterrupt: 

In [None]:
result_df = pd.DataFrame(result)
result_df.to_csv('../rlipp/rlipp.out', sep='\t', index=False)