In [None]:
import pandas as pd
from pathlib import Path
import numpy as np

import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt

from tqdm import tqdm

from scipy.stats import pearsonr, spearmanr
from collections import defaultdict

import matplotlib.colors as clr

In [None]:
p = Path('../../data/input')
pred_p = Path('../../data/output')

In [None]:
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt

font_scale = 1.3
sns.set(font_scale=font_scale, style="ticks", font="Lato")
matplotlib.rcParams['font.weight'] = "medium"
matplotlib.rcParams['font.size'] = 10 * font_scale
matplotlib.rcParams['axes.labelweight'] = 'medium'
matplotlib.rcParams['figure.titleweight'] = 'medium'
matplotlib.rcParams['axes.titleweight'] = 'medium'


## Figure 2

In [None]:
from scipy.stats import pearsonr, spearmanr
from collections import defaultdict

metrics = defaultdict(dict)

metrics_value = []
metrics_value_spearman = []
typs = []
authors_we = []

for typ in ['complex', 'defined']:
    exp = 'Native'
    auth_df = pd.read_csv(p / f'{typ}_medium_{exp}.csv')[['sequence', 'Measured Expression', 'Predicted Expression']]
    our_df = pd.read_csv(pred_p / f'{typ}_{exp}.txt', header=None, sep='\t')
        
    authords_corr,authords_corr_sp = [], []
    our_corr, our_corr_sp = [], []
        
    for _ in tqdm(range(10000)):
        ind = np.random.choice(a=len(auth_df.iloc[:, 1]), size=len(auth_df.iloc[:, 1]), replace=True)
        authords_corr.append(pearsonr(auth_df.iloc[ind, 1], auth_df.iloc[ind, 2])[0])
        our_corr.append(pearsonr(auth_df.iloc[ind, 1], our_df.iloc[ind, 1])[0])
        authords_corr_sp.append(spearmanr(auth_df.iloc[ind, 1], auth_df.iloc[ind, 2])[0])
        our_corr_sp.append(spearmanr(auth_df.iloc[ind, 1], our_df.iloc[ind, 1])[0])
    metrics_value += authords_corr + our_corr
    metrics_value_spearman += authords_corr_sp + our_corr_sp
    authors_we += ['Vaishnav et al. 2022'] * 10000 + ['LegNet'] * 10000
    typs += [f'{typ}'] * 20000

In [None]:
df = pd.DataFrame({'metrics': metrics_value, 'Model': authors_we, 'typ': typs})
df_sp = pd.DataFrame({'metrics': metrics_value_spearman, 'Model': authors_we, 'typ': typs})

df.typ = df.typ.str.capitalize()
df_sp.typ = df_sp.typ.str.capitalize()

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(10, 5), dpi=300, sharey=True)

ax[0].axvspan(0, 0+.5, facecolor="#AAAAAA", alpha=0.1)
ax[0].axvspan(1, 1+.5, facecolor="#AAAAAA", alpha=0.1)
ax[1].axvspan(0, 0+.5, facecolor="#AAAAAA", alpha=0.1)
ax[1].axvspan(1, 1+.5, facecolor="#AAAAAA", alpha=0.1)

sns.violinplot(data=df, y='metrics', x='typ', hue='Model', ax=ax[0], palette=["#E69F00", "#542788"], )
sns.violinplot(data=df_sp, y='metrics', x='typ', hue='Model', ax=ax[1], palette=["#E69F00", "#542788"])
#ax.set_title('Performance of NoGiNet on native sequences \ncompared to transformer model, \nbootstraped (n=10000)')
handles, labels = ax[0].get_legend_handles_labels()
#fig.legend(handles, labels, loc='upper center', ncol=2)
ax[0].set_xlabel('Medium')
ax[0].set_ylabel('Pearson correlation')
ax[1].set_xlabel('Medium')
ax[1].set_ylabel('Spearman correlation', labelpad=10)
handles, labels = plt.gca().get_legend_handles_labels()
order = [1, 0]
ax[0].legend([handles[idx] for idx in order],[labels[idx] for idx in order], 
             prop=dict(size=10 * 1.1),
             loc='upper right'
            )
ax[1].get_legend().remove()
plt.setp(ax[0].collections, alpha=.8)
plt.setp(ax[1].collections, alpha=.8)

ax[0].tick_params(axis="y", direction='in')
ax[0].tick_params(axis="x", direction='in')
ax[1].tick_params(axis="y", direction='in')
ax[1].tick_params(axis="x", direction='in')

# statistical annotation
x1, x2 = -0.20, .20
y, h, col = df[df.typ=='Complex'].metrics.max() + 0.002, 0.002, "#AAAAAA"
ax[0].plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
ax[0].text((x1+x2)*.5, y+h-0.001, "*", ha='center', va='bottom', color=col)

x1, x2 = -0.20 + 1, .20 + 1
y, h, col = df[df.typ=='Defined'].metrics.max() + 0.002, 0.002, "#AAAAAA"
ax[0].plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
ax[0].text((x1+x2)*.5, y+h-0.001, "*", ha='center', va='bottom', color=col)

x1, x2 = -0.20, .20
y, h, col = df_sp[df_sp.typ=='Complex'].metrics.max() + 0.002, 0.002, "#AAAAAA"
ax[1].plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
ax[1].text((x1+x2)*.5, y+h-0.001, "*", ha='center', va='bottom', color=col)

x1, x2 = -0.20 + 1, .20 + 1
y, h, col = df_sp[df_sp.typ=='Defined'].metrics.max() + 0.002, 0.002, "#AAAAAA"
ax[1].plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
ax[1].text((x1+x2)*.5, y+h-0.001, "*", ha='center', va='bottom', color=col)


#fig.savefig(f'violin.png', dpi=300)
plt.show()

## Figure 3

In [None]:
from tqdm import tqdm

from scipy.stats import pearsonr, spearmanr
from collections import defaultdict

medium = 'defined'
df = pd.read_csv(pred_p / f'{medium}_medium_drift_res.tsv', sep='\t')
muts = []
corrs_p, corrs_sp = [], []
auth = []
for n_mut in range(1, 4):
    corr_p_we, corr_sp_we = [], []
    corr_p_auth, corr_sp_auth = [], []
    df_mut = df[df.n_mut == n_mut]
    for _ in tqdm(range(10000)):
        ind = np.random.choice(a=len(df_mut), size=len(df_mut), replace=True)
        corr_p_we.append(pearsonr(df_mut.delta_measured.iloc[ind], df_mut.delta_predicted.iloc[ind])[0])
        corr_p_auth.append(pearsonr(df_mut.delta_measured.iloc[ind], df_mut.delta_predicted_auth.iloc[ind])[0])
        
        corr_sp_we.append(spearmanr(df_mut.delta_measured.iloc[ind], df_mut.delta_predicted.iloc[ind])[0])
        corr_sp_auth.append(spearmanr(df_mut.delta_measured.iloc[ind], df_mut.delta_predicted_auth.iloc[ind])[0])
    muts += [n_mut] * 20000
    corrs_p += corr_p_auth + corr_p_we
    corrs_sp += corr_sp_auth + corr_sp_we 
    auth +=  ['Vaishnav et al. 2022'] * 10000 + ['LegNet'] * 10000
    
df_p_defined = pd.DataFrame({'metrics': corrs_p, 'Model': auth, 'n_mut': muts})
df_sp_defined = pd.DataFrame({'metrics': corrs_sp, 'Model': auth, 'n_mut': muts})

medium = 'complex'
df = pd.read_csv(pred_p / f'{medium}_medium_drift_res.tsv', sep='\t')
muts = []
corrs_p, corrs_sp = [], []
auth = []
for n_mut in range(1, 4):
    corr_p_we, corr_sp_we = [], []
    corr_p_auth, corr_sp_auth = [], []
    df_mut = df[df.n_mut == n_mut]
    for _ in tqdm(range(10000)):
        ind = np.random.choice(a=len(df_mut), size=len(df_mut), replace=True)
        corr_p_we.append(pearsonr(df_mut.delta_measured.iloc[ind], df_mut.delta_predicted.iloc[ind])[0])
        corr_p_auth.append(pearsonr(df_mut.delta_measured.iloc[ind], df_mut.delta_predicted_auth.iloc[ind])[0])
        
        corr_sp_we.append(spearmanr(df_mut.delta_measured.iloc[ind], df_mut.delta_predicted.iloc[ind])[0])
        corr_sp_auth.append(spearmanr(df_mut.delta_measured.iloc[ind], df_mut.delta_predicted_auth.iloc[ind])[0])
    muts += [n_mut] * 20000
    corrs_p += corr_p_auth + corr_p_we
    corrs_sp += corr_sp_auth + corr_sp_we 
    auth +=  ['Vaishnav et al. 2022'] * 10000 + ['LegNet'] * 10000
    
df_p_complex = pd.DataFrame({'metrics': corrs_p, 'Model': auth, 'n_mut': muts})
df_sp_complex = pd.DataFrame({'metrics': corrs_sp, 'Model': auth, 'n_mut': muts})

In [None]:
#fig, ax = plt.subplots(1, 2, figsize=(10, 5), dpi=300, sharey=True)
fig, big_axes = plt.subplots(2, 1, figsize=(10, 8), dpi=300, sharey=True)

#fig.suptitle("Abs difference between authors and our errors", y=1, x=.5, fontsize=20)

for row, big_ax in enumerate(big_axes, start=1):
    if row == 1:
        big_ax.set_title("Defined medium", y=1.1, fontsize=18)
    elif row == 2:
        big_ax.set_title("Complex medium", y=1.1, fontsize=18)

    # Turn off axis lines and ticks of the big subplot 
    # obs alpha is 0 in RGBA string!
    big_ax.axis('off')
#     big_ax.tick_params(which='both',labelcolor=(1.0, 1.0,1.0, 1.0), 
#                        top='off', bottom='off', left='off', right='off', 
#                        labelbottom='off', labeltop='off', labelleft='off', labelright='off')
    # removes the white frame
    big_ax._frameon = False
    
order = [1, 0]

    
ax = fig.add_subplot(2, 2, 1)
sns.violinplot(data=df_p_defined, y='metrics', x='n_mut', hue='Model', ax=ax, palette=["#E69F00", "#542788"])
handles, labels = plt.gca().get_legend_handles_labels()
ax.set_xlabel('Number of mutations')
ax.set_ylabel('Pearson correlation')
ax.legend([handles[idx] for idx in order],[labels[idx] for idx in order], 
             prop=dict(size=10 * 1.1),
             loc='lower right'
            )
ax.set_ylim(.58, 1)
ax.tick_params(axis="y", direction='in')
ax.tick_params(axis="x", direction='in')
plt.setp(ax.collections, alpha=.8)
x1, x2 = -0.20, .20
y, h, col = df_p_defined[df_p_defined.n_mut==1].metrics.max() + 0.015, 0.005, "#AAAAAA"
ax.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
ax.text((x1+x2)*.5, y+h-0.001, "*", ha='center', va='bottom', color=col)

x1, x2 = -0.20 + 1, .20 + 1
y, h, col = df_p_defined[df_p_defined.n_mut==2].metrics.max() + 0.015, 0.002, "#AAAAAA"
ax.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
ax.text((x1+x2)*.5, y+h-0.001, "*", ha='center', va='bottom', color=col)

x1, x2 = -0.20 + 2, .20 + 2
y, h, col = df_p_defined[df_p_defined.n_mut==3].metrics.max() + 0.015, 0.002, "#AAAAAA"
ax.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
ax.text((x1+x2)*.5, y+h-0.001, "*", ha='center', va='bottom', color=col)

ax.axvspan(0, 0+.5, facecolor="#AAAAAA", alpha=0.1)
ax.axvspan(1, 1+.5, facecolor="#AAAAAA", alpha=0.1)
ax.axvspan(2, 2+.5, facecolor="#AAAAAA", alpha=0.1)



ax = fig.add_subplot(2, 2, 2)
sns.violinplot(data=df_sp_defined, y='metrics', x='n_mut', hue='Model', ax=ax, palette=["#E69F00", "#542788"])
ax.set_xlabel('Number of mutations')
ax.set_ylabel('Spearman correlation', labelpad=10)
ax.get_legend().remove()
ax.set_ylim(.58, 1)
ax.tick_params(axis="y", direction='in')
ax.tick_params(axis="x", direction='in')
plt.setp(ax.collections, alpha=.8)
x1, x2 = -0.20, .20
y, h, col = df_sp_defined[df_sp_defined.n_mut==1].metrics.max() + 0.015, 0.005, "#AAAAAA"
ax.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
ax.text((x1+x2)*.5, y+h-0.001, "*", ha='center', va='bottom', color=col)

x1, x2 = -0.20 + 1, .20 + 1
y, h, col = df_sp_defined[df_sp_defined.n_mut==2].metrics.max() + 0.015, 0.002, "#AAAAAA"
ax.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
ax.text((x1+x2)*.5, y+h-0.001, "*", ha='center', va='bottom', color=col)

x1, x2 = -0.20 + 2, .20 + 2
y, h, col = df_sp_defined[df_sp_defined.n_mut==3].metrics.max() + 0.015, 0.002, "#AAAAAA"
ax.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
ax.text((x1+x2)*.5, y+h-0.001, "*", ha='center', va='bottom', color=col)

ax.axvspan(0, 0+.5, facecolor="#AAAAAA", alpha=0.1)
ax.axvspan(1, 1+.5, facecolor="#AAAAAA", alpha=0.1)
ax.axvspan(2, 2+.5, facecolor="#AAAAAA", alpha=0.1)


ax = fig.add_subplot(2, 2, 3)
sns.violinplot(data=df_p_complex, y='metrics', x='n_mut', hue='Model', ax=ax, palette=["#E69F00", "#542788"])
ax.set_xlabel('Number of mutations')
ax.set_ylabel('Pearson correlation')
ax.get_legend().remove()
ax.set_ylim(.58, 1)
ax.tick_params(axis="y", direction='in')
ax.tick_params(axis="x", direction='in')
plt.setp(ax.collections, alpha=.8)
x1, x2 = -0.20, .20
y, h, col = df_p_complex[df_p_complex.n_mut==1].metrics.max() + 0.015, 0.005, "#AAAAAA"
ax.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
ax.text((x1+x2)*.5, y+h-0.001, "*", ha='center', va='bottom', color=col)

x1, x2 = -0.20 + 1, .20 + 1
y, h, col = df_p_complex[df_p_complex.n_mut==2].metrics.max() + 0.015, 0.002, "#AAAAAA"
ax.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
ax.text((x1+x2)*.5, y+h-0.001, "*", ha='center', va='bottom', color=col)

x1, x2 = -0.20 + 2, .20 + 2
y, h, col = df_p_complex[df_p_complex.n_mut==3].metrics.max() + 0.015, 0.002, "#AAAAAA"
ax.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
ax.text((x1+x2)*.5, y+h-0.001, "*", ha='center', va='bottom', color=col)

ax.axvspan(0, 0+.5, facecolor="#AAAAAA", alpha=0.1)
ax.axvspan(1, 1+.5, facecolor="#AAAAAA", alpha=0.1)
ax.axvspan(2, 2+.5, facecolor="#AAAAAA", alpha=0.1)


ax = fig.add_subplot(2, 2, 4)
sns.violinplot(data=df_sp_complex, y='metrics', x='n_mut', hue='Model', ax=ax, palette=["#E69F00", "#542788"])
ax.set_xlabel('Number of mutations')
ax.set_ylabel('Spearman correlation', labelpad=10)
ax.set_ylim(.58, 1)
ax.tick_params(axis="y", direction='in')
ax.tick_params(axis="x", direction='in')
plt.setp(ax.collections, alpha=.8)
x1, x2 = -0.20, .20
y, h, col = df_sp_complex[df_sp_complex.n_mut==1].metrics.max() + 0.015, 0.005, "#AAAAAA"
ax.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
ax.text((x1+x2)*.5, y+h-0.001, "*", ha='center', va='bottom', color=col)

x1, x2 = -0.20 + 1, .20 + 1
y, h, col = df_sp_complex[df_sp_complex.n_mut==2].metrics.max() + 0.015, 0.002, "#AAAAAA"
ax.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
ax.text((x1+x2)*.5, y+h-0.001, "*", ha='center', va='bottom', color=col)

x1, x2 = -0.20 + 2, .20 + 2
y, h, col = df_sp_complex[df_sp_complex.n_mut==3].metrics.max() + 0.015, 0.002, "#AAAAAA"
ax.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
ax.text((x1+x2)*.5, y+h-0.001, "*", ha='center', va='bottom', color=col)

ax.axvspan(0, 0+.5, facecolor="#AAAAAA", alpha=0.1)
ax.axvspan(1, 1+.5, facecolor="#AAAAAA", alpha=0.1)
ax.axvspan(2, 2+.5, facecolor="#AAAAAA", alpha=0.1)

handles, labels = ax.get_legend_handles_labels()
#fig.legend(handles, labels, loc='upper center', ncol=2, bbox_to_anchor=(0.5, 1.05))
ax.get_legend().remove()
fig.tight_layout()
plt.show()

## Supplementary figure 1

In [None]:
from tqdm import tqdm

from scipy.stats import pearsonr, spearmanr
from collections import defaultdict

metrics = defaultdict(dict)

metrics_value = []
metrics_value_spearman = []
typs = []
authors_we = []

for typ in ['complex']:
    auth_df = pd.read_csv(p / f'{typ}_medium_{exp}.csv')[['sequence', 'Measured Expression', 'Predicted Expression']]
    our_df = pd.read_csv(pred_p / f'{typ}_{exp}.txt', header=None, sep='\t')
        
    authords_corr,authords_corr_sp = [], []
    our_corr, our_corr_sp = [], []
        
    for _ in tqdm(range(10000)):
        ind = np.random.choice(a=len(auth_df.iloc[:, 1]), size=len(auth_df.iloc[:, 1]), replace=True)
        authords_corr.append(pearsonr(auth_df.iloc[ind, 1], auth_df.iloc[ind, 2])[0])
        our_corr.append(pearsonr(auth_df.iloc[ind, 1], our_df.iloc[ind, 1])[0])
        authords_corr_sp.append(spearmanr(auth_df.iloc[ind, 1], auth_df.iloc[ind, 2])[0])
        our_corr_sp.append(spearmanr(auth_df.iloc[ind, 1], our_df.iloc[ind, 1])[0])
    metrics_value += authords_corr + our_corr
    metrics_value_spearman += authords_corr_sp + our_corr_sp
    authors_we += ['Vaishnav et al. 2022'] * 10000 + ['LegNet'] * 10000
    typs += [f'{typ}'] * 20000

In [None]:
for typ in ['complex']:
    for m in ['DanQ', 'DeepSEA', 'DeepAtt']:
        exp = 'Native'
        auth_df = pd.read_csv(p / f'Native_test_{m}_model.csv')[['sequence', 'Measured Expression', 'Predicted Expression']]
        authords_corr,authords_corr_sp = [], []

        for _ in tqdm(range(10000)):
            ind = np.random.choice(a=len(auth_df.iloc[:, 1]), size=len(auth_df.iloc[:, 1]), replace=True)
            authords_corr.append(pearsonr(auth_df.iloc[ind, 1], auth_df.iloc[ind, 2])[0])
            authords_corr_sp.append(spearmanr(auth_df.iloc[ind, 1], auth_df.iloc[ind, 2])[0])
        metrics_value += authords_corr
        metrics_value_spearman += authords_corr_sp
        authors_we += [m] * 10000
        typs += [f'{typ}'] * 10000

In [None]:
df = pd.DataFrame({'metrics': metrics_value, 'Model': authors_we, 'typ': typs})
df_sp = pd.DataFrame({'metrics': metrics_value_spearman, 'Model': authors_we, 'typ': typs})

In [None]:
df['to_sort'] = 0
df.loc[df.Model == 'DeepAtt', 'to_sort'] = 1
df.loc[df.Model == 'DanQ','to_sort'] = 2
df.loc[df.Model == 'Vaishnav et al. 2022', 'to_sort'] = 3
df.loc[df.Model == 'LegNet', 'to_sort'] = 4
df = df.sort_values('to_sort')

df_sp['to_sort'] = 0
df_sp.loc[df.Model == 'DeepAtt', 'to_sort'] = 1
df_sp.loc[df.Model == 'DanQ','to_sort'] = 2
df_sp.loc[df.Model == 'Vaishnav et al. 2022', 'to_sort'] = 3
df_sp.loc[df.Model == 'LegNet', 'to_sort'] = 4
df_sp = df_sp.sort_values('to_sort')

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(12, 5), dpi=300, sharey=True)

ax[0].axvspan(0-0.5, 0+.5, facecolor="#AAAAAA", alpha=0.1)
ax[0].axvspan(1.5, 2.5, facecolor="#AAAAAA", alpha=0.1)
ax[0].axvspan(3.5, 4.5, facecolor="#AAAAAA", alpha=0.1)

ax[1].axvspan(0-0.5, 0+.5, facecolor="#AAAAAA", alpha=0.1)
ax[1].axvspan(1.5, 2.5, facecolor="#AAAAAA", alpha=0.1)
ax[1].axvspan(3.5, 4.5, facecolor="#AAAAAA", alpha=0.1)

sns.violinplot(data=df, y='metrics', x='Model', ax=ax[0], palette=["#AAAAAA", "#AAAAAA", "#AAAAAA", "#E69F00", "#542788"], )
sns.violinplot(data=df_sp, y='metrics', x='Model',  ax=ax[1], palette=["#AAAAAA", "#AAAAAA", "#AAAAAA", "#E69F00", "#542788"])
#ax.set_title('Performance of NoGiNet on native sequences \ncompared to transformer model, \nbootstraped (n=10000)')
handles, labels = ax[0].get_legend_handles_labels()
#fig.legend(handles, labels, loc='upper center', ncol=2)
ax[0].set_xlabel('')
ax[0].set_ylabel('Pearson correlation')
ax[1].set_xlabel('')
ax[1].set_ylabel('Spearman correlation', labelpad=10)

# handles, labels = plt.gca().get_legend_handles_labels()
# order = [1, 0]
# ax[0].legend([handles[idx] for idx in order],[labels[idx] for idx in order], 
#              prop=dict(size=10 * 1.1),
#              loc='upper right'
#             )
#ax[1].get_legend().remove()
plt.setp(ax[0].collections, alpha=.8)
plt.setp(ax[1].collections, alpha=.8)

ax[0].tick_params(axis="y", direction='in')
ax[0].tick_params(axis="x", direction='in')
ax[1].tick_params(axis="y", direction='in')
ax[1].tick_params(axis="x", direction='in')

# statistical annotation
# x1, x2 = -0.20, .20
# y, h, col = df[df.typ=='Complex'].metrics.max() + 0.002, 0.002, "#AAAAAA"
# ax[0].plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
# ax[0].text((x1+x2)*.5, y+h-0.001, "*", ha='center', va='bottom', color=col)

# x1, x2 = -0.20 + 1, .20 + 1
# y, h, col = df[df.typ=='Defined'].metrics.max() + 0.002, 0.002, "#AAAAAA"
# ax[0].plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
# ax[0].text((x1+x2)*.5, y+h-0.001, "*", ha='center', va='bottom', color=col)

# x1, x2 = -0.20, .20
# y, h, col = df_sp[df_sp.typ=='Complex'].metrics.max() + 0.002, 0.002, "#AAAAAA"
# ax[1].plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
# ax[1].text((x1+x2)*.5, y+h-0.001, "*", ha='center', va='bottom', color=col)

# x1, x2 = -0.20 + 1, .20 + 1
# y, h, col = df_sp[df_sp.typ=='Defined'].metrics.max() + 0.002, 0.002, "#AAAAAA"
# ax[1].plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
# ax[1].text((x1+x2)*.5, y+h-0.001, "*", ha='center', va='bottom', color=col)

for tick in ax[0].get_xticklabels():
    tick.set_rotation(35)

for tick in ax[1].get_xticklabels():
    tick.set_rotation(35)

fig.tight_layout()
fig.savefig(f'supp_violin.png', dpi=300)
plt.show()

## Figure 1

In [None]:
#fig, ax = plt.subplots(1, 2, figsize=(10, 5), dpi=300, sharey=False)

from matplotlib.colorbar import Colorbar
g_list = []
for i, typ in enumerate(['defined', 'complex']):
    auth_df = pd.read_csv(p / f'{typ}_medium_{exp}.csv')[['sequence', 'Measured Expression', 'Predicted Expression']]
    our_df = pd.read_csv(pred_p / f'{typ}_{exp}.txt', header=None, sep='\t')
       
    
    x = auth_df.iloc[:, 1]
    coeff = np.polyfit(x, our_df.iloc[:, 1], 1)
    coeff_auth = np.polyfit(x, auth_df.iloc[:, 2], 1)
    
    poly1d_fn = np.poly1d(coeff) 
    poly1d_fn_auth = np.poly1d(coeff_auth) 
    
    authords_corr = pearsonr(auth_df.iloc[:, 1], auth_df.iloc[:, 2])[0]
    our_corr = pearsonr(auth_df.iloc[:, 1], our_df.iloc[:, 1])[0]
    authords_corr_sp = spearmanr(auth_df.iloc[:, 1], auth_df.iloc[:, 2])[0]
    our_corr_sp = spearmanr(auth_df.iloc[:, 1], our_df.iloc[:, 1])[0]

    joint_kws=dict(gridsize=50, extent= (2.5, 17.5, 2, 16))
    g = sns.JointGrid(x=our_df.iloc[:, 1], y=x, ratio=5)
    g.ax_joint.set_xlim(2, 17.5)
    g.ax_joint.set_ylim(2, 17.5)
    g.plot_marginals(sns.distplot, color="#AAAAAA")
    cmap = clr.LinearSegmentedColormap.from_list('custom blue', ['#FFFFFF','#542788'], N=256)
    g.plot_joint(plt.hexbin, gridsize=50, cmap=cmap, linewidths = (0,))
    g.ax_joint.plot(poly1d_fn(x), x, '-', linewidth = 2, color="#AAAAAA")

    
    g.ax_joint.set_xlabel('Predicted expression', size=13*font_scale)
    if i == 0:
        g.ax_joint.set_ylabel('Measured expression\n(defined media)', size=13*font_scale)
    elif i == 1:
        g.ax_joint.set_ylabel('Measured expression\n(complex media)', size=13*font_scale)
    g.ax_joint.text(3, 15, f"Pearson's r = {round(our_corr,3)}", size=13*font_scale)
    g.ax_joint.text(3, 14, f"Spearman's r = {round(our_corr_sp,3)}", size=13*font_scale)
    g.ax_joint.text(3, 13, f"N = {len(our_df.iloc[:, 1])}", size=13*font_scale)
    g.ax_joint.tick_params(axis='both', which='major', pad=3)
    import matplotlib.ticker as plticker

    loc = plticker.MultipleLocator(base=2.0) # this locator puts ticks at regular intervals
    g.ax_joint.xaxis.set_major_locator(loc)

    #g.ax_joint.autoscale()
    #g.savefig(f'{typ}_jointplot.png', dpi=300)
    g_list.append(g)
    
# fig = plt.figure(figsize=(40,20), dpi=300)
# gs = gridspec.GridSpec(1, 2, figure=fig, width_ratios=[1, 1], height_ratios=[1])

# mg0 = SeabornFig2Grid(g_list[0], fig, gs[0])
# mg1 = SeabornFig2Grid(g_list[1], fig, gs[1])
# #cb = Colorbar(ax = plt.subplot(gs[1,0]), mappable = g.ax_joint.collections[0], orientation = 'vertical', ticklocation = 'left')


# gs.tight_layout(fig)
# #gs.update(top=1.2)
# #gs.update(bottom=-0.55)
# #gs.update(right=1.2)
# #gs.update(left=0.06)
# #gs.update(wspace=0.5)
# #gs.update(hspace=0)
# #fig.tight_layout()
# plt.savefig('jointplot_colorbar.png')
plt.show()


        

In [None]:
from matplotlib import gridspec
fig = plt.figure(figsize=(0.5,5), dpi=300)
gs = gridspec.GridSpec(1, 4, figure=fig, width_ratios=[1, 1, 1, 1], height_ratios=[1])
cbax = plt.subplot(gs[3])
cb = Colorbar(ax = cbax, mappable = g.ax_joint.collections[0], orientation = 'vertical', ticklocation = 'left')
#plt.savefig('jointplot_colorbar.png')

plt.show()