In [None]:
import gzip
import numpy as np
import pandas as pd
import itertools
import sys

# Append path to l1ktools/python, source hosted here: https://github.com/cmap/l1ktools
sys.path.append('/PATH/TO/l1ktools/python')

# Import LINCS_functions.py file
# NOTE: Alter LINCS_functions.py file to contain YOUR local path to l1ktools (as above) and the LINCS modzs.gctx file
sys.path.append('/PATH/TO/LINCS_functions.py')
import LINCS_functions as lf

In [None]:
# construct gene_df
path = 'https://github.com/dhimmel/lincs/raw/gh-pages/data/geneinfo/geneinfo.tsv.gz'
gene_df = lf.url_to_df(path)
gene_df.head()

In [None]:
# create list of bing and epi probes
is_bing = gene_df[gene_df.is_bing == True]
bing_list = list(is_bing.pr_id)
is_epi = gene_df[(gene_df.pr_pool_id == 'epsilon') | (gene_df.pr_pool_id == 'epsilon|deltap')]
epi_list = list(is_epi.pr_id)

In [None]:
# construct pert_df
path = 'https://github.com/dhimmel/lincs/raw/d42347fcb53c30afed705b973fb52a1ae45a26b1/data/pertinfo/pertinfo.tsv.gz'
pert_df = lf.url_to_df(path)
pert_df = pert_df[['pert_id', 'pubchem_cid']]
pert_df.rename(columns={'pubchem_cid':'pubchem_id'}, inplace=True)
pert_df.pubchem_id = pert_df['pubchem_id'].astype(str) # when merging frames later, easier to merge strings
pert_df.head()

In [None]:
# construct sig_df
path = 'https://github.com/dhimmel/lincs/raw/d42347fcb53c30afed705b973fb52a1ae45a26b1/data/siginfo/siginfo.tsv.gz'
sig_df = lf.url_to_df(path)
sig_df.head()

In [None]:
# construct drugbank_df
path = 'https://github.com/dhimmel/drugbank/raw/e8567eed2dd48ae0694a0960c518763a777845ff/data/mapping/pubchem.tsv'
drugbank_df = pd.read_table(path)
drugbank_df.pubchem_id = drugbank_df['pubchem_id'].astype(str)
drugbank_df.head()

In [None]:
# create dataframe with meta-data for perturbagens which map to drugbank drugs
db_meta_df = drugbank_df.merge(pert_df, how='inner').merge(sig_df[sig_df.is_gold == True], how='inner')
db_meta_df.head()

In [None]:
# create signature (gold) expression dataframe

gold_sig = list(sig_df.sig_id[sig_df.is_gold == True & sig_df.pert_type.isin({'trt_cp', 'ctl_vehicle'})])
sig_expr_df = lf.extract_from_gctx(gctx_path, epi_list, gold_sig)

# Condense to perturbagens (pert_expr_df)
pert_to_sig_dic = {k: g['sig_id'].tolist() for k, g in sig_df.groupby('pert_id')}
pert_expr_df = lf.get_consensus_signatures(sig_expr_df, pert_to_sig_dic)

#Condense to drugbank drugs (db_expr_df)
db_to_sig_dic = {k: g['sig_id'].tolist() for k, g in db_meta_df.groupby('drugbank_id')}
db_expr_df = lf.get_consensus_signatures(sig_expr_df, db_to_sig_dic)

# Condense to genes
probe_to_gene = dict(zip(gene_df.pr_id, gene_df.pr_gene_id))
pert_expr_df = lf.probes_to_genes(pert_expr_df, probe_to_gene)
db_expr_df = lf.probes_to_genes(db_expr_df, probe_to_gene)

In [None]:
db_expr_df.head()

In [None]:
pert_expr_df.head()

In [None]:
# SAVE pert_expr_df
path = "PATH/TO/SAVE/TO/pert_expr_df.csv.gz"
with gzip.open(path, "w") as writefile:
    pert_expr_df.to_csv(writefile, sep='\t', float_format='%.3f')

In [None]:
# SAVE db_expr_df
path = "PATH/TO/SAVE/TO/db_expr_df.csv.gz"
with gzip.open(path, "w") as writefile:
    db_expr_df.to_csv(writefile, sep='\t', float_format='%.3f')