In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
import umap

import sys
sys.path.append("../profiling/")
import profiling

In [None]:
NUM_FEATURES = 384
INPUT_PROFILES = "data/well_level_profiles_vits_LINCS_1e-5_final.csv"
MOA_FILE = 'moa_annotation.csv'

OUTPUT_CSV = 'output/UMAP_of_LINCS_ViT_Small.csv'
OUTPUT_UMAP = 'output/UMAP_VITs.png'

In [None]:
wells = pd.read_csv(INPUT_PROFILES)
columns2 = [str(i) for i in range(NUM_FEATURES)]
wells.shape

In [None]:
df_cpds_moas = pd.read_csv(MOA_FILE)

In [None]:
cpds_moa = dict(zip(df_cpds_moas['broad_id'], df_cpds_moas['moa']))
cpds_moa['DMSO@NA'] = 'control'
wells["moa"]= wells["Treatment_Clean"].map(cpds_moa)

In [None]:
def helper_func(comp):
    for y in str(comp).split("|"):
        if y in moa_list:
            return y
    return "Other"
moa_list = ["tubulin polymerization inhibitor","cdk inhibitor", "hsp inhibitor", "plk inhibitor", "proteasome inhibitor",  "xiap inhibitor", "control"]
# moa_list = ['inositol monophosphatase inhibitor', 'MDM inhibitor', 'purine antagonist', 'PKC activator', 'MEK inhibitor', 'BCL inhibitor', 'Control']

wells["highlight_moa"] = wells["moa"].apply(lambda x: helper_func(x))


# Well-level UMAP

In [None]:
reducer = umap.UMAP(n_neighbors=15, n_components=2)
embeddings = reducer.fit_transform(wells[columns2])
aux = pd.concat((pd.DataFrame(embeddings, columns=["X", "Y"]), wells.reset_index()), axis=1)

In [None]:
neg_control = aux[aux['Treatment']== 'DMSO@NA']
# pos_control = aux.loc[aux["Treatment_Clean"].isin(['BRD-K50691590', 'BRD-K88510285'])]
# treatment = aux.loc[~aux["Treatment_Clean"].isin(['DMSO@NA','BRD-K50691590', 'BRD-K88510285'])]
others = aux[aux.highlight_moa=='Other']
highlight = aux[(aux.highlight_moa!='Other') & (aux.highlight_moa!='control')]

In [None]:
palette={"xiap inhibitor": "darkred", 
         "hsp inhibitor": "orange", 
         "cdk inhibitor": "lightblue", 
         "plk inhibitor": "darkblue", 
         "tubulin polymerization inhibitor": "pink", 
         "proteasome inhibitor": 'green'}

In [None]:
plt.figure(figsize=(10,10))

a = sb.scatterplot(data=others, x="X", y="Y", s=5, color='silver')
b = sb.scatterplot(data=neg_control, x="X", y="Y", s=5, color='red', marker='+')
# c = sb.scatterplot(data=pos_control, x="X", y="Y", s=5, color='green', marker='+')
d = sb.scatterplot(data=highlight, x="X", y="Y", s=5, hue="highlight_moa", palette=palette)

plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.savefig(OUTPUT_UMAP, bbox_inches='tight')

In [None]:
aux.to_csv(OUTPUT_CSV,index=False)