In [57]:
import pandas as pd

In [8]:
df = pd.read_csv('data/antibody_dictionary.csv')

### Create dictionary mapping probes to targets

In [49]:
probe_target_dict = {}
for i, row in df.iterrows():
    vals = row.values
    key = vals[0].strip().lower()
    vals = {v.strip().lower() for v in vals[1:] if not pd.isnull(v)}
    probe_target_dict[key] = vals

### Create dictionary mapping targets to probes

In [50]:
target_probe_dict = {}
for key, vals in probe_target_dict.items():
    for val in vals:
        target_probe_dict.setdefault(val, set([])).add(key)

### Save dictionaries to disk for reuse

In [58]:
import pickle

In [60]:
with open('probe_target_dict.pic', 'wb') as f:
    pickle.dump(probe_target_dict, f)

In [61]:
with open('target_probe_dict.pic', 'wb') as f:
    pickle.dump(target_probe_dict, f)

### Check that we can load pickled files

In [67]:
with open('probe_target_dict.pic', 'rb') as f:
    ptd = pickle.load(f)
assert(ptd == probe_target_dict)

In [69]:
with open('target_probe_dict.pic', 'rb') as f:
    tpd = pickle.load(f)
assert(tpd == target_probe_dict)

### Utility function to find common target(s) of multiple probes

In [51]:
from functools import reduce

In [52]:
def find_targets(probes, probe_target_dict=probe_target_dict):
    """Return intersection of targets identified by probes."""
    return reduce(lambda s1, s2: s1 & s2, 
                 [probe_target_dict[probe.strip().lower()] for probe in probes])

### Simple examples to check

#### Find targets of a probe

In [54]:
probe = 'Arl13B'.strip().lower()
probe_target_dict[probe]

{'acinar epithelial cell',
 'alveolar interstitial cell',
 'alveolar interstitial tissue',
 'alveolar parenchyma',
 'alveolar septum',
 'arl13b',
 'arterial smooth muscle',
 'bronchiolar epithelial cell',
 'bronchiolar epithelium',
 'bronchiolar smooth muscle',
 'bronchiole',
 'immature type i pneumocyte',
 'immature type ii pneumocyte',
 'interstitial cell',
 'interstitial tissue',
 'pre-alveolar acinar tubule',
 'pre-alveolar parenchyma',
 'pre-alveolar transitory duct',
 'proximal acinar tubule',
 'pulmonary artery',
 'pulmonary vein',
 'smooth muscle cell',
 'unclassified alveolar interstitial cell',
 'unclassified fibroblast',
 'venous smooth muscle'}

#### Find probes for a target

In [55]:
target = 'smooth muscle cell'.strip().lower()
target_probe_dict[target]

{'?-smooth muscle actin', 'arl13b'}

#### Find what target (if any) is common to multiple probes

In [56]:
probes = ['Arl13B', 'Pro SP-C', 'Sox9']
find_targets(probes)

{'immature type ii pneumocyte'}