Skip to content

Commit

Permalink
added figure scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
Vincentx15 committed Sep 1, 2020
1 parent cd8d2ff commit 08dea2f
Show file tree
Hide file tree
Showing 2 changed files with 179 additions and 53 deletions.
156 changes: 103 additions & 53 deletions eval/eval_utils.py
Expand Up @@ -22,12 +22,62 @@
rc = {'figure.figsize': (10, 5),
'axes.facecolor': 'white',
'axes.grid': True,
'lines.linewidth': 4,
'lines.linewidth': 2.5,
'grid.color': '.8',
'font.size': 12}
plt.rcParams.update(rc)


def plot_one(dir_path, use_norm_score=False, obj='logp', successive=False):
# 2,3,23 not 2,23,3
names = os.listdir(dir_path)
numbers = [int(name.split('_')[-1].split('.')[0]) for name in names]
asort = np.argsort(np.array(numbers))
iterations = np.array(numbers)[asort][:21]
sorted_names = np.array(names)[asort][:21]

batch_size = None # default
mus, stds, best, best_smiles = list(), list(), list(), list()
olds = set()
newslist = list()
for name in sorted_names:
# Check scores
news = 0
df = pd.read_csv(os.path.join(dir_path, name))
df = df[df['score'] != 0]
if use_norm_score:
values = df['norm_score']
else:
values = df['score']
mus.append(np.mean(values))
stds.append(np.std(values))
if obj == 'docking': # lowest docking score is better
i_best = np.argmin(values)
best.append(np.min(values))
else:
i_best = np.argmax(values)
best.append(np.max(values))

# Check novelty
smiles = df['smile']
# print(values[3])
# print(smiles)
best_smiles.append((smiles[i_best], values[i_best]))
for smile in smiles:
if smile not in olds:
olds.add(smile)
news += 1
newslist.append(news)
if successive:
olds = set(smiles)
if batch_size is None:
batch_size = 1000 # default
newslist = [min(batch_size, new_ones) for new_ones in newslist]
title = dir_path.split("/")[-1]

return iterations, mus, stds, batch_size, newslist, title, best, best_smiles


def plot_csvs(dir_paths, ylim=(-12, -6), plot_best=False, return_best=False, use_norm_score=False, obj='logp',
successive=True):
"""
Expand All @@ -41,55 +91,6 @@ def plot_csvs(dir_paths, ylim=(-12, -6), plot_best=False, return_best=False, use
:return:
"""

def plot_one(dir_path, use_norm_score=False, obj='logp', successive=successive):
# 2,3,23 not 2,23,3
names = os.listdir(dir_path)
numbers = [int(name.split('_')[-1].split('.')[0]) for name in names]
asort = np.argsort(np.array(numbers))
iterations = np.array(numbers)[asort][:21]
sorted_names = np.array(names)[asort][:21]

batch_size = None # default
mus, stds, best, best_smiles = list(), list(), list(), list()
olds = set()
newslist = list()
for name in sorted_names:
# Check scores
news = 0
df = pd.read_csv(os.path.join(dir_path, name))
df = df[df['score'] != 0]
if use_norm_score:
values = df['norm_score']
else:
values = df['score']
mus.append(np.mean(values))
stds.append(np.std(values))
if obj == 'docking': # lowest docking score is better
i_best = np.argmin(values)
best.append(np.min(values))
else:
i_best = np.argmax(values)
best.append(np.max(values))

# Check novelty
smiles = df['smile']
# print(values[3])
# print(smiles)
best_smiles.append((smiles[i_best], values[i_best]))
for smile in smiles:
if smile not in olds:
olds.add(smile)
news += 1
newslist.append(news)
if successive:
olds = set(smiles)
if batch_size is None:
batch_size = 1000 # default
newslist = [min(batch_size, new_ones) for new_ones in newslist]
title = dir_path.split("/")[-1]

return iterations, mus, stds, batch_size, newslist, title, best, best_smiles

if not isinstance(dir_paths, list): # plot only one cbas
print(dir_paths)
fig, ax = plt.subplots(1, 2)
Expand Down Expand Up @@ -159,7 +160,7 @@ def plot_one(dir_path, use_norm_score=False, obj='logp', successive=successive):

# fig.tight_layout(pad=2.0)
fig.align_labels()
else :
else:
for i, dir_path in enumerate(dir_paths):
iterations, mus, stds, batch_size, newslist, title, best_scores, best_smiles = plot_one(dir_path,
use_norm_score,
Expand Down Expand Up @@ -190,6 +191,54 @@ def plot_one(dir_path, use_norm_score=False, obj='logp', successive=successive):
return best_smiles


def figure_cbas(dir_path=('plot/wee1_noseed', 'plot/big_newlr'),
ylim=(-12, -6), plot_best=False,
return_best=False,
use_norm_score=False, obj='logp',
successive=False):
fig, ax = plt.subplots(1, 2)

wee1, drd3 = dir_path

iterations, mus, stds, batch_size, newslist, title, best_scores, best_smiles = plot_one(drd3,
use_norm_score,
obj,
successive=successive)
print(newslist)
mus = np.array(mus)
stds = np.array(stds)
ax[0].fill_between(iterations, mus + stds, mus - stds, alpha=.25)
sns.lineplot(iterations, mus, ax=ax[0], label='DRD3')
ax[1].plot(iterations, newslist, label='DRD3')

iterations, mus, stds, batch_size, newslist, title, best_scores, best_smiles = plot_one(wee1,
use_norm_score,
obj,
successive=successive)
print(newslist)
mus = np.array(mus)
stds = np.array(stds)
ax[0].fill_between(iterations, mus + stds, mus - stds, alpha=.25)
sns.lineplot(iterations, mus, ax=ax[0], label='WEE1')
# sns.lineplot(iterations, mus, ax=ax[0], label='WEE1',size=2)
ax[1].plot(iterations, newslist, label='WEE1')

ax[0].set_ylim(ylim[0], ylim[1])
ax[0].set_xlim(1, iterations[-1] + 0.2)
ax[1].set_ylim(0, batch_size + 100)

sns.despine()
ax[0].set_xlabel('Iterations')
ax[0].set_ylabel('Docking Score (kcal/mol)')
ax[1].set_xlabel('Iterations')
ax[1].set_ylabel('Novel samples')
ax[1].legend()
fig.tight_layout(pad=2.0)
fig.align_labels()
plt.savefig("cbas_fig2.pdf", format="pdf")
plt.show()


def plot_kde(z):
"""
Input:
Expand Down Expand Up @@ -277,7 +326,7 @@ def pca_plot_hue(z, pca, variable, label):
# plot_csvs('plot/clogp_adam_small')
# plot_csvs('plot/qed_ln_nosched_big_lesslr', ylim=(0.5, 1))
# plot_csvs(['plot/robust_run2','plot/qed_ln_nosched_big'], ylim=(0.5, 1))
plot_csvs('plot/wee1')
# plot_csvs('plot/wee1_noseed')
# plot_csvs('plot/big_newlr')
# plot_csvs(['plot/big_newlr2','plot/big_newlr'])
# plot_csvs('plot/big_lnnosched')
Expand All @@ -290,5 +339,6 @@ def pca_plot_hue(z, pca, variable, label):
# plot_csvs('plot/big_newlr2')
# plot_csvs('plot/multi',successive=False)


# plot_csvs(['plot/bo_clogp', 'plot/cbas_clogp'], ylim=(-6, 12.5), plot_best=True)

figure_cbas()
76 changes: 76 additions & 0 deletions eval/replicates.py
@@ -0,0 +1,76 @@
import os
import sys

script_dir = os.path.dirname(os.path.realpath(__file__))
if __name__ == '__main__':
sys.path.append(os.path.join(script_dir, '..'))

import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from eval.eval_utils import plot_one

rc = {'figure.figsize': (10, 5),
'axes.facecolor': 'white',
'axes.grid': True,
'lines.linewidth': 2.5,
'grid.color': '.8',
'font.size': 12}
plt.rcParams.update(rc)


def figure_replicates(dir_path=('plot/big_newlr', 'plot/big_newlr2', 'plot/big_newlr3'),
ylim=(-12, -6), plot_best=False,
return_best=False,
use_norm_score=False, obj='logp',
successive=False):
fig, ax = plt.subplots(1, 2)

all_scores = list()
all_news = list()
all_iters = list()
for i, exp in enumerate(dir_path):
iterations, mus, stds, batch_size, newslist, title, best_scores, best_smiles = plot_one(exp,
use_norm_score,
obj,
successive=successive)
all_iters.append(iterations)
all_news.append(newslist)
all_scores.append(mus)

sns.lineplot(iterations, mus, ax=ax[0], label=f'Replicate {i + 1}')
ax[1].plot(iterations, newslist)

# # Get min iterations and crop
# min_iter = min([len(its) for its in all_iters])
# print(min_iter)
# iterations = all_iters[0][:min_iter]
# all_news = [np.array(news[:min_iter]) for news in all_news]
# all_news = np.stack(all_news)
# all_scores = [np.array(score[:min_iter]) for score in all_scores]
# all_scores = np.stack(all_scores)
#
# score_mus, score_std = np.mean(all_scores, axis=0), np.std(all_scores, axis=0)
# news_mus, news_std = np.mean(all_news, axis=0), np.std(all_news, axis=0)
#
# ax[0].fill_between(iterations, score_mus + score_std, score_mus - score_std, alpha=.25)
# sns.lineplot(iterations, score_mus, ax=ax[0])
# ax[1].plot(iterations, news_mus)

ax[0].set_ylim(ylim[0], ylim[1])
ax[0].set_xlim(1, iterations[-1] + 0.2)
ax[1].set_ylim(0, batch_size + 100)
sns.despine()
ax[0].set_xlabel('Iterations')
ax[0].set_ylabel('Docking Score (kcal/mol)')
ax[1].set_xlabel('Iterations')
ax[1].set_ylabel('Novel samples')
# ax[1].legend()
fig.tight_layout(pad=2.0)
fig.align_labels()
plt.savefig("cbas_replicated.pdf", format="pdf")
plt.show()


figure_replicates()

0 comments on commit 08dea2f

Please sign in to comment.