In [None]:
import itertools

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

In [None]:
redu_sample_info = pd.read_csv(
    'http://redu.ucsd.edu/dump', sep='\t',
    usecols=['filename', 'NCBITaxonomy', 'UBERONOntologyIndex'])
redu_sample_info = redu_sample_info[
    redu_sample_info['NCBITaxonomy'] == '9606|Homo sapiens']
lib_search = pd.read_csv(
    '../data/external/'
    'MOLECULAR-LIBRARYSEARCH-V2-53e265f8-view_compound_occurrence-main.tsv',
    sep='\t').drop(columns=['TotalFiles'])
curated_drugs = pd.read_csv(
    'https://docs.google.com/spreadsheets/d/'
    '1bxmvCxA4fVovkgorolGJVgMTrHfZObsiSfbr1Dn7AsU/'
    'export?gid=791719573&format=csv',
    usecols=['ReDU_GNPS_Annotation']).squeeze().str.lower().unique()

In [None]:
identified_drugs = lib_search[lib_search['LibraryID'].str.lower()
                              .isin(curated_drugs)]

In [None]:
drugs_body_part = (pd.merge(identified_drugs.set_index('LibraryID').T,
                            redu_sample_info, left_index=True,
                            right_on='filename')
                   .drop(columns=['filename'])
                   .groupby('UBERONOntologyIndex').sum())

In [None]:
body_part_counts = (redu_sample_info['UBERONOntologyIndex']
                    .value_counts().to_frame()
                    .rename(columns={'UBERONOntologyIndex': 'total'}))
drugs_body_part_all = (drugs_body_part.sum(axis='columns')
                       .to_frame().reset_index()
                       .rename(columns={'UBERONOntologyIndex': 'accession',
                                        0: 'count'}))
drugs_body_part_all = pd.merge(drugs_body_part_all, body_part_counts,
                               left_on='accession', right_index=True)
drugs_body_part_all['count'] = ((drugs_body_part_all['count']
                                 / drugs_body_part_all['total'] * 100)
                                .astype(int))
drugs_body_part_all = pd.DataFrame(itertools.chain(
    *[[accession] * count for accession, count in zip(
        drugs_body_part_all['accession'], drugs_body_part_all['count'])]),
                    columns=['accession'])
drugs_body_part_all['coord'] = drugs_body_part_all['accession'].map(
    {'UBERON:0001085': [(250, 300)],              # torso
     'UBERON:0001511': [(205, 500), (295, 500)],  # leg
     'UBERON:0001513': [(200, 625), (300, 625)],  # foot
     'UBERON:0001519': [(120, 340), (380, 340)],  # hand
     'UBERON:0002427': [(160, 275), (340, 275)],  # arm
     'UBERON:0012180': [(250, 100)],              # head
     'UBERON:0015474': [(190, 175), (310, 175)]}) # shoulder
drugs_body_part_all = (drugs_body_part_all.explode('coord')
                       .reset_index(drop=True))
drugs_body_part_all = pd.concat(
    [drugs_body_part_all, drugs_body_part_all['coord'].apply(pd.Series)],
    axis='columns', ignore_index=True)
drugs_body_part_all = (drugs_body_part_all.drop(columns=[1])
                       .rename(columns={0: 'accession', 2: 'x', 3: 'y'}))

In [None]:
width = 7
height = width / 1.618
fig, ax = plt.subplots(figsize=(width, height))

ax.set_facecolor('#471164')

ax.imshow(plt.imread('body.png'), zorder=2)

sns.kdeplot(data=drugs_body_part_all, x='x', y='y', cbar=True,
            cbar_kws={'shrink': 0.75, 'pad': 0, 'ticks': [],
                      'label': 'Drugs skin frequency'},
            ax=ax, levels=50, thresh=0, bw_adjust=0.4, fill=True,
            cmap='viridis')

sns.despine(left=True, bottom=True)
ax.set_xticks([])
ax.set_yticks([])
ax.set_xlabel('')
ax.set_ylabel('')

plt.savefig('body_map.png', dpi=300, bbox_inches='tight', facecolor='white')
plt.show()
plt.close()