In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from os import listdir
from os.path import isfile

In [None]:
results_dir = 'results'
protein = '1PGB'
plots_dir = 'plots'

plt.rcParams.update({'font.size': 13})

In [None]:
def get_params(files):
    Ts, thresholds = [], []
    for file in files:
        raw_params = file.split('_')[2:]
        T = float(raw_params[0][1] + '.' + raw_params[0][2:])
        threshold = float(raw_params[1][1] + '.' + raw_params[2:-4])
        Ts.append(T)
        thresholds.append(threshold)
    T = np.unique(Ts)
    threshold = np.unique(thresholds)
    assert len(T) == 1, 'Too many temperatures'
    assert len(threshold) == 1, 'Too many thresholds'
    return T[0], threshold[0]

def load_data(files):
    S_df, dS_df = pd.DataFrame(), pd.DataFrame()
    for ifile, file in enumerate(files):
        protein_type = file.split('_')[1]        
        with open(f'{results_dir}/{protein}/{file}', 'r') as file:
            lines = file.readlines()
        
        if ifile == 0:
            gamma = [float(line.split('\t')[0]) for line in lines]
            S_df['gamma'] = gamma
            dS_df['gamma'] = gamma
            
        S = [float(line.split('\t')[1]) for line in lines]
        dS = [float(line.split('\t')[2]) for line in lines]
        
        S_df[protein_type] = S
        dS_df[protein_type] = dS
    return S_df, dS_df

In [None]:
Simpson_files = [filename for filename in listdir(f'{results_dir}/{protein}') if isfile(f'{results_dir}/{protein}/{filename}') and ('Simpson' in filename)]
MidPoint_files = [filename for filename in listdir(f'{results_dir}/{protein}') if isfile(f'{results_dir}/{protein}/{filename}') and ('MidPoint' in filename)]

Simpson_S_df, Simpson_dS_df = load_data(Simpson_files)
MidPoint_S_df, MidPoint_dS_df = load_data(MidPoint_files)
T, threshold = get_params(Simpson_files + MidPoint_files)

In [None]:
keys = ['wt, mtsim, mtexp']
labels = ['wild-type', 'simulated mutants', 'experimental mutants']
colors = ['red', 'blue', 'green']

---
---
### Local Entropy curve

In [None]:
fig = plt.figure(figsize = (12, 8))
fig.suptitle(r'Local Entropy $S_{\beta, \gamma}$ curve estimate')

# Simpson
ax = plt.subplot(2, 1, 1)
ax.title(f'Simpson integration method\n(threshold = {threshold}, T = {T})')
for key, label, color in zip(keys, labels, colors):
    mask = [key in column for column in Simpson_S_df]
    masked_df = Simpson_S_df.iloc[:, mask]
    for icol, column in enumerate(masked_df.columns):
        if icol == 0:
            ax.plot(masked_df.loc[:, 'gamma'], masked_df.loc[:, column], linestyle = '--', color = color, label = label)
        else:
            ax.plot(masked_df.loc[:, 'gamma'], masked_df.loc[:, column], linestyle = '--', color = color)
ax.set_xlim([0., np.max(Simpson_S_df['gamma'])])
ax.legend()
ax.grid(True)

# MidPoint
ax = plt.subplot(2, 1, 2)
ax.title(f'MidPoint integration method\n(threshold = {threshold}, T = {T})')
for key, label, color in zip(keys, labels, colors):
    mask = [key in column for column in MidPoint_S_df]
    masked_df = MidPoint_S_df.iloc[:, mask]
    for icol, column in enumerate(masked_df.columns):
        if icol == 0:
            ax.plot(masked_df.loc[:, 'gamma'], masked_df.loc[:, column], linestyle = '--', color = color, label = label)
        else:
            ax.plot(masked_df.loc[:, 'gamma'], masked_df.loc[:, column], linestyle = '--', color = color)
ax.set_xlim([0., np.max(MidPoint_S_df['gamma'])])
ax.legend()
ax.grid(True);

In [None]:
import matplotlib.pyplot as plt
import matplotlib.text as mtext

class LegendTitle(object):
    def __init__(self, text_props=None):
        self.text_props = text_props or {}
        super(LegendTitle, self).__init__()

    def legend_artist(self, legend, orig_handle, fontsize, handlebox):
        x0, y0 = handlebox.xdescent, handlebox.ydescent
        title = mtext.Text(x0, y0, orig_handle,  **self.text_props)
        handlebox.add_artist(title)
        return title

In [None]:
fig = plt.figure(figsize = (12, 8))
fig.suptitle(r'Local Entropy $S_{\beta, \gamma}$ curve estimate', y = 0.96)

ax = plt.subplot(1, 1, 1)
Simpson_scatters, MidPoint_scatters = [], []
for key, label, color in zip(keys, labels, colors):
    mask = [key in column for column in Simpson_S_df]
    
    # Simpson
    masked_df = Simpson_S_df.iloc[:, mask]
    for icol, column in enumerate(masked_df.columns):
        if icol == 0:
            Simpson_scatters += ax.scatter(masked_df.loc[:, 'gamma'], masked_df.loc[:, column], '^', color = color, label = label)
        else:
            ax.scatter(masked_df.loc[:, 'gamma'], masked_df.loc[:, column], linestyle = '--', color = color)
    
    # MidPoint
    masked_df = MidPoint_S_df.iloc[:, mask]
    for icol, column in enumerate(masked_df.columns):
        if icol == 0:
            MidPoint_scatters += ax.scatter(masked_df.loc[:, 'gamma'], masked_df.loc[:, column], 's', color = color, label = label)
        else:
            ax.scatter(masked_df.loc[:, 'gamma'], masked_df.loc[:, column], linestyle = '--', color = color)

ax.set_xlim([0., np.max(Simpson_S_df['gamma'])])
ax.grid(True)
ax.legend(
    ['Simpson'] + Simpson_scatters + [''] + ['MidPoint'] + MidPoint_scatters, 
    [''] + labels + [''] + [''] + labels,
    handler_map = {str: LegendTitle({'fontsize': 13})},
    bbox_to_anchor = (1, 1),
    handlelength = 5
);

---
---
### Local Entropy peak distribution

In [None]:
protein_types = []
Simpson = []
MidPoint = []
for key in keys:
    mask = [key in column for column in Simpson_S_df]
    protein_types = protein_types + [key] * np.array(mask, dtype = int).sum()
    Simpson = Simpson + list(Simpson_S_df.iloc[-1, mask])
    MidPoint = MidPoint + list(MidPoint_S_df.iloc[-1, mask])
    
sns_df = pd.DataFrame({
    'protein_types': protein_types + protein_types,
    'S': Simpson + MidPoint,
    'method': ['Simpson'] * len(protein_types) + ['MidPoint'] *  * len(protein_types)
})

In [None]:
sns.catplot(data = sns_df, 
            kind = "swarm", 
            x = "method", y = "S", hue = "protein_types")