In [1]:
# Add path with autoencoding code
import sys
# caution: path[0] is reserved for script path (or '' in REPL)
sys.path.insert(1, '../code_autoencoding')

import numpy as np
from numpy import dot
from numpy.linalg import norm

import pandas as pd
import data_loader as dl
import itertools

In [2]:
# Helper functions for getting cosine similarity from drug response vectors.
def cos_sim(a,b):
    return dot(a, b)/(norm(a)*norm(b))

def combs_cos_sim(arr):

    if len(arr) < 2:
        #print(len(arr))
        return 1
    combs = list(combinations(np.arange(len(arr)), 2))
    sims = [cos_sim(arr[c[0]], arr[c[1]]) for c in combs]

    return np.mean(sims)

#### Read in both the data and meta data for LINCS level 2

In [None]:
data_path = "../data/shared_landmark_counts_vecs.gctx_n100000x960.gct"
lincs_data = dl.load_CMap(data_path)
lincs_vectors = dict(dl.vectorize(lincs_data))

meta_data_path  = "../meta_data/GSE92742_Broad_LINCS_inst_info.txt"
meta_data = pd.read_csv(meta_data_path, sep = '\t')

#### Remove meta data for which there is no reponse in the LINCS data 

In [None]:
meta_data = meta_data[meta_data['inst_id'].isin(lincs_data.columns)]

#### Get the vector IDs (inst_id's) corresponding to each pertubation-cell combination

In [None]:
pert_cell_combs = itertools.product(set(meta_data.cell_id), set(meta_data.pert_iname))
pert_cell_combs = list(zip(meta_data.pert_iname, meta_data.cell_id))
vector_ids = dict()

for c in pert_cell_combs:
    vector_ids[c] = meta_data.loc[(meta_data['pert_iname'] == c[0]) & (meta_data['cell_id'] == c[1])]['inst_id']

#### Compute the mean vector for each pertubation-cell combination. This is the average response for a given cell type and drug in the native space

In [64]:
pert_cell_mean_vectors = dict()

for k, v in vector_ids.items():
    vectors = [lincs_vectors[inst_id] for inst_id in v]
    pert_cell_mean_vectors[k] = np.mean(vectors, axis = 0)

#### For each vector, get the cosine similarity between the mean responses for each cell

In [74]:
pert_cos_sims = dict()

for pert in set(meta_data.pert_iname):
    
    pert_cell_keys = [(pert, cell) for cell in set(meta_data[meta_data['pert_iname'] == pert]['cell_id'])]
        
    pert_vectors = [pert_cell_mean_vectors[(pert, cell)] for (pert, cell) in pert_cell_keys]
    
    pert_cos_sims[pert] = combs_cos_sim(pert_vectors)
    

1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1


0.9630696