In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sb
import umap
from sklearn.decomposition import PCA

In [None]:
aux = pd.read_csv('data/well_level_aux_combined.csv.gz')
main = pd.read_csv('data/treatment_level_aux_combined.csv.gz')

columns = [str(i) for i in range(672)]
well_data = aux[columns].to_numpy()

In [None]:
fingerprints = np.load('data/fingerprints_bbbc022.npz')['features']
bbbc022_smiles_scaffolds = pd.read_csv('data/bbbc022_smiles_scaffolds.csv')
Y = pd.read_csv("data/BBBC022_MOA_MATCHES_official.csv")

In [None]:
#get new UMAP embeddings
reducer = umap.UMAP()
embeddings = reducer.fit_transform(fingerprints)
aux = pd.concat((pd.DataFrame(embeddings, columns=["UMAP 1", "UMAP 2"]), bbbc022_smiles_scaffolds.reset_index()), axis=1)
aux = pd.merge(aux, Y, left_on = 'Metadata_BROAD_ID', right_on = 'Var1', how = 'left')

#to read aux used in publication uncomment next line
#aux = pd.read_csv('data/chemical_aux_umap.csv')

#UMAP embeddings that were used for the supplementary figure are already in this repository
#aux.to_csv('data/chemical_aux_umap.csv', index = False)

sb.scatterplot(data=aux, x="UMAP 1", y="UMAP 2", s=100, color="lightpink", linewidth=0.5, edgecolor="black", alpha=0.8)

In [None]:
moas = []
for k,r in Y.iterrows():
    for i in r["Metadata_moa.x"].split("|"):
        moas.append(i)

moas = pd.DataFrame({'MoA': moas })

In [None]:
fig = plt.figure(figsize=(10,10))
a  = "potassium channel activator"
g = sb.scatterplot(data=aux[~aux['Metadata_moa.x'].str.contains(a)], x="UMAP 1", y="UMAP 2", s=100, color="dodgerblue", linewidth=0.5, edgecolor="black", alpha=0.8)
h = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(a)], x="UMAP 1", y="UMAP 2", s=100, color="limegreen", linewidth=0.5, edgecolor="black", alpha=0.8)

plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
x_lims = (None, None)
y_lims = (None, None)
plt.show()

In [None]:
selected_moas = ['bacterial cell wall synthesis inhibitor', 'bacterial dna gyrase inhibitor', 'dopamine receptor agonist', 
                 "estrogen receptor agonist", "glucocorticoid receptor agonist", "progesterone receptor agonist",
                "sterol demethylase inhibitor", "atp channel blocker"]

In [None]:
fig = plt.figure(figsize=(10,10))
h = sb.scatterplot(data=aux[~aux['Metadata_moa.x'].str.contains('|'.join(selected_moas))], x="UMAP 1", y="UMAP 2", s=100, color="dodgerblue", linewidth=0.5, edgecolor="black", alpha=0.8)

v = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(selected_moas[0])], x="UMAP 1", y="UMAP 2", s=100, color="indigo", linewidth=0.5, edgecolor="black", alpha=0.8)
w = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(selected_moas[1])], x="UMAP 1", y="UMAP 2", s=100, color="teal", linewidth=0.5, edgecolor="black", alpha=0.8)
x = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(selected_moas[2])], x="UMAP 1", y="UMAP 2", s=100, color="limegreen", linewidth=0.5, edgecolor="black", alpha=0.8)
y = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(selected_moas[3])], x="UMAP 1", y="UMAP 2", s=100, color="gold", linewidth=0.5, edgecolor="black", alpha=0.8)

k = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(selected_moas[4])], x="UMAP 1", y="UMAP 2", s=100, color="salmon", linewidth=0.5, edgecolor="black", alpha=0.8)
l = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(selected_moas[5])], x="UMAP 1", y="UMAP 2", s=100, color="rosybrown", linewidth=0.5, edgecolor="black", alpha=0.8)
m = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(selected_moas[6])], x="UMAP 1", y="UMAP 2", s=100, color="hotpink", linewidth=0.5, edgecolor="black", alpha=0.8)
n = sb.scatterplot(data=aux[aux['Metadata_moa.x'].str.contains(selected_moas[7])], x="UMAP 1", y="UMAP 2", s=100, color="crimson", linewidth=0.5, edgecolor="black", alpha=0.8)


plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
x_lims = (None, None)
y_lims = (None, None)
plt.show()

In [None]:
fig.savefig("chemical_space_moa.png") 
fig.savefig("chemical_space_moa.svg") 

In [None]:
main

In [None]:
fig = plt.figure(figsize=(10,10))
h = sb.scatterplot(data=main[~main['Metadata_moa.x'].str.contains('|'.join(selected_moas))], x="X", y="Y", s=100, color="dodgerblue", linewidth=0.5, edgecolor="black", alpha=0.8)

v = sb.scatterplot(data=main[main['Metadata_moa.x'].str.contains(selected_moas[0])], x="X", y="Y", s=100, color="indigo", linewidth=0.5, edgecolor="black", alpha=0.8)
w = sb.scatterplot(data=main[main['Metadata_moa.x'].str.contains(selected_moas[1])], x="X", y="Y", s=100, color="teal", linewidth=0.5, edgecolor="black", alpha=0.8)
x = sb.scatterplot(data=main[main['Metadata_moa.x'].str.contains(selected_moas[2])], x="X", y="Y", s=100, color="limegreen", linewidth=0.5, edgecolor="black", alpha=0.8)
y = sb.scatterplot(data=main[main['Metadata_moa.x'].str.contains(selected_moas[3])], x="X", y="Y", s=100, color="gold", linewidth=0.5, edgecolor="black", alpha=0.8)

k = sb.scatterplot(data=main[main['Metadata_moa.x'].str.contains(selected_moas[4])], x="X", y="Y", s=100, color="salmon", linewidth=0.5, edgecolor="black", alpha=0.8)
l = sb.scatterplot(data=main[main['Metadata_moa.x'].str.contains(selected_moas[5])], x="X", y="Y", s=100, color="rosybrown", linewidth=0.5, edgecolor="black", alpha=0.8)
m = sb.scatterplot(data=main[main['Metadata_moa.x'].str.contains(selected_moas[6])], x="X", y="Y", s=100, color="hotpink", linewidth=0.5, edgecolor="black", alpha=0.8)
n = sb.scatterplot(data=main[main['Metadata_moa.x'].str.contains(selected_moas[7])], x="X", y="Y", s=100, color="crimson", linewidth=0.5, edgecolor="black", alpha=0.8)

plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
x_lims = (None, None)
y_lims = (None, None)
plt.show()

In [None]:
fig.savefig("phenotypic_space_moa.png") 
fig.savefig("phenotypic_space_moa.svg") 