# Zero shot mutational effect prediction

In [1]:
import os
from subprocess import DEVNULL, STDOUT, check_call
import imageio.v2 as imageio

def make(pdb_path, png_path, video_path):
    pdbs = [os.path.join(pdb_path, file) for file in os.listdir(pdb_path)]
    #pdbs = sorted(pdbs, key=lambda f: int(f.split('/')[-1].split('_ASMT')[0]))

    if not os.path.exists(png_path):
        os.mkdir(png_path)
    designs = {}


    images = []
    for i, pdb in enumerate(pdbs):
        if i == 0:
            # only open this one file without alignemt
            struc = pdb
            png = os.path.join(png_path, pdb.split('/')[-1][:-4] + '.png')
            lines = [
                f'load {struc}, struc',
                'spectrum b, red_white_blue',
                'bg_color white',
                'set ray_opaque_background, off',
                'set ray_trace_fog, 0.5',
                'set ray_shadows, off',
                'set ambient, 0.2',
                'ray 1200, 1200',
                f'png {png}',
                'quit'
            ]
            with open('make_png.pml', 'w') as f:
                for line in lines:
                    f.writelines(line + '\n')

            images.append(png)
            check_call(['pymol', '-cpihq', 'make_png.pml'], stdout=DEVNULL, stderr=STDOUT)

        else:
            # align to previous file
            struc = pdb
            prev_struc = pdbs[i-1]
            png = os.path.join(png_path, pdb.split('/')[-1][:-4] + '.png')
            lines = [
                f'load {struc}, struc',
                f'load {prev_struc}, prev_struc',
                'align struc, prev_struc',
                'delete prev_struc',
                'spectrum b, red_white_blue',
                'bg_color white',
                'set ray_opaque_background, off',
                'set ray_trace_fog, 0.5',
                'set ray_shadows, off',
                'set ambient, 0.2',
                'ray 1200, 1200',
                f'png {png}',
                'quit'
            ]
            with open('make_png.pml', 'w') as f:
                for line in lines:
                    f.writelines(line + '\n')

            images.append(png)
            check_call(['pymol', '-cpihq', 'make_png.pml'], stdout=DEVNULL, stderr=STDOUT)



    # Read each PNG file and append to the list of frames
    frames = []
    for file in images:
        frames.append(imageio.imread(file))

    # Save the frames as a GIF
    output_file = os.path.join(video_path, f"design.gif")
    imageio.mimsave(output_file, frames, duration=0.1)

In [40]:
import sys
sys.path.append('../')
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import seaborn as sns
import numpy as np
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from analysis import video

#make('../example_data/zero_shot/pdbs/', '../example_data/zero_shot/pngs/', '../example_data/zero_shot/')

def simulation_hub(path: str, t: int, data: pd.DataFrame):
    
    pdb_path = os.path.join(path_to_simulation, 'pdbs')
    png_path = os.path.join(path_to_simulation, 'pngs')
    data_path = os.path.join(path_to_simulation, 'data')
    
    df = data.iloc[:, :-4]
    
    x_lim = df.max().max() * 1.1
    _t = '{:04d}'.format(t)
    _design = mut = str(data['description'].iloc[t])
    img_path = os.path.join(png_path, _design + '.png')
    sns.set_style('white')
    nums = data.index.values
    total_energies = data.total_energy.to_list()
    
    
    # Create the GridSpec with a 2x2 layout and different ratios
    fig = plt.figure(figsize=(12, 8))
    gs = GridSpec(2, 2, width_ratios=[2, 1], height_ratios=[2, 1])

    # Load the image and display it in the first subplot
    img = plt.imread(img_path)
    ax1 = fig.add_subplot(gs[0, 0])
    ax1.imshow(img)
    ax1.set_axis_off()
    ax1.set_title(_design)
    

    # Plot the data in the second subplot
    ax2 = fig.add_subplot(gs[0, 1])
    sns.barplot(x=df.iloc[t].values, y=df.iloc[t].index)
    ax2.set_title('Individual weighted energies')
    ax2.set(xlim=(0, x_lim))

    # Plot the data in the third subplot
    ax3 = fig.add_subplot(gs[1, 0])
    sns.lineplot(x=nums, y=total_energies, ax=ax3)
    plt.scatter(nums[t], total_energies[t], marker="*", s=80)
    mut = str(data['mut'].iloc[t])
    props = dict(boxstyle='round', facecolor='lightblue', alpha=0.3)
    ax3.text(0.95, 0.95, mut, transform=ax3.transAxes, fontsize=12,
            verticalalignment='top', horizontalalignment='right', bbox=props)
    ax3.set_title('Total energy as proxy for structure disruption')

    
    # Create some data for the other plots
    x = nums
    y1 = np.sin(x)
    
    # Plot the data in the fourth subplot
    ax4 = fig.add_subplot(gs[1, 1])
    plt.scatter(x[t], y1[t], marker="*", s=80)
    sns.lineplot(x=x, y=y1, ax=ax4)
    ax4.set_title('clustered enzyme variants based on encoding and colored by activity')

    # Adjust the spacing between subplots
    fig.tight_layout()

    # Show the figure
    #plt.show()
    
path_to_simulation = '../example_data/zero_shot/'
data = pd.read_csv(os.path.join(path_to_simulation, 'data/energy_log.pdb'))
data = data.iloc[:, 4:]
data['total_energy'] = data.iloc[:, :-3].sum(axis=1)
data = data.sort_values(by='total_energy', ascending=False).reset_index(drop=True)


n_files = len(os.listdir(path_to_simulation+'pdbs'))

interact(simulation_hub, path=fixed(path_to_simulation), t=(1, n_files-2), data=fixed(data))

interactive(children=(IntSlider(value=864, description='t', max=1728, min=1), Output()), _dom_classes=('widget…

<function __main__.simulation_hub(path: str, t: int, data: pandas.core.frame.DataFrame)>

In [38]:
data.iloc[:, 4:]

Unnamed: 0,e_bb_coord x 0.02,e_all_atm x 0.15,position,mut,description,total_energy
0,0.028277,0.056299,0,M0W,M0W_ASMT,0.620620
1,0.027246,0.056781,0,M0D,M0D_ASMT,0.618955
2,0.028762,0.056248,0,M0N,M0N_ASMT,0.617758
3,0.027231,0.057242,0,M0F,M0F_ASMT,0.617325
4,0.028676,0.058533,0,M0H,M0H_ASMT,0.617103
...,...,...,...,...,...,...
1725,0.000604,0.001856,40,P40C,P40C_ASMT,0.506412
1726,0.002998,0.002667,80,K80C,K80C_ASMT,0.505862
1727,0.003770,0.003159,4,E4C,E4C_ASMT,0.502674
1728,0.003780,0.004393,46,A46C,A46C_ASMT,0.502137
