[![Open in Google Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eagalpern/folding-ising-globular/blob/master/notebooks/custom_potts_simulation_colab.ipynb)


# Folding Ising Simulation using a custom potts model defined by the user (for any protein family)



In [1]:
# @title Install dependencies 

import pandas as pd
from matplotlib import pyplot as plt, colors
import numpy as np
import os
import zipfile
import requests
import subprocess
import pickle
import sys
import seaborn as sns
import json
from multiprocessing import cpu_count
import importlib.util
from ipywidgets import interact, widgets, Checkbox, Text, VBox, Output
from IPython.display import display, clear_output
from google.colab import files

# Install py3Dmol for visualizing structures and Bio to load fasta sequences
def install_if_missing(package, import_name=None):
    if import_name is None:
        import_name = package
    if importlib.util.find_spec(import_name) is None:
        subprocess.check_call([sys.executable, "-m", "pip", "install", "--quiet", package])
install_if_missing("py3Dmol")
install_if_missing("biopython", "Bio")
#!pip install --quiet py3Dmol Bio

# Detect if running in Google Colab
IN_COLAB = 'COLAB_GPU' in os.environ

# Set the repository URL, clone directory, and data directory
REPO_URL = "https://github.com/eagalpern/folding-ising-globular.git"
SOURCE_DIR = "/content/repo" if IN_COLAB else "../"
DATA_DIR = os.path.join(SOURCE_DIR, "data")

# Clone the GitHub repository in Colab only
if IN_COLAB:
    if not os.path.exists(SOURCE_DIR):
        subprocess.run(f'git clone {REPO_URL} {SOURCE_DIR}', shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
    sys.path.append(f"{SOURCE_DIR}/source/")


else:
    # For local usage
    sys.path.append('../source/')
    SOURCE_DIR = '/home/ezequiel/libraries/folding-ising-globular/'
    DATA_DIR = os.path.join(SOURCE_DIR, "data")

from fasta_utils import *
from visualization import *
from plot_results import *
from ising_simulation import *
from utils import *
from DMS_prediction import *



In [13]:
# @title Upload Potts Model
"""
Upload a Potts model in .npz format or load an example file interactively.

The Potts model file must be a .npz file containing two NumPy arrays:
- 'h': A 2D array of shape (L, len(alphabet)), where L is the length of the protein sequence.
- 'J': A 4D array of shape (L, L, len(alphabet), len(alphabet)).

The example file provided is for the ubiquitin family.
"""


# Output widget for displaying messages
output = widgets.Output()

# Toggle to choose between uploading a file or loading an example
load_example_toggle = widgets.ToggleButtons(
    options=['Upload File', 'Load Example'],
    description='Choose Option:',
    disabled=False,
    button_style='',  # 'success', 'info', 'warning', 'danger' or ''
    tooltips=['Upload your own .npz file', 'Load an example .npz file']
)

# Button to trigger the loading process
load_button = widgets.Button(
    description='Load Potts Model',
    disabled=False,
    button_style='success',  # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Click to load the Potts model'
)

# Global variables
potts = None
seq_len = None

# Function to handle the loading process
def load_potts_model(_):
    global potts, seq_len  # Declare `potts` and `seq_len` as global
    with output:
        output.clear_output()  # Clear previous output
        if load_example_toggle.value == 'Load Example':
            # Load an example file
            #print("Loading example Potts model...")

            example_file = DATA_DIR+'/potts_model/potts_ubiquitin.npz'
            # Load the example file
            potts = np.load(example_file, allow_pickle=True)
            print("Example Potts model for ubiquitin family loaded successfully.")
        else:
            # Upload a file
            print("Please upload a .npz file.")
            uploaded = files.upload()
            if uploaded:
                # Get the name of the uploaded file
                file_name = list(uploaded.keys())[0]

                # Load the .npz file into the `potts` variable
                potts = np.load(file_name, allow_pickle=True)
                print(f"File '{file_name}' uploaded and loaded successfully.")
            else:
                print("No file was uploaded.")

        # Check if `potts` is an NpzFile and has the correct structure
        if not isinstance(potts, np.lib.npyio.NpzFile):
            print("Error: The loaded file is not an .npz file.")
            return

        if 'h' not in potts or 'J' not in potts:
            print("Error: The loaded file does not contain 'h' and 'J' keys.")
            return

        h = potts['h']
        J = potts['J']

        if not isinstance(h, np.ndarray) or not isinstance(J, np.ndarray):
            print("Error: 'h' and 'J' must be NumPy arrays.")
            return

        L = h.shape[0]
        alphabet_len = len(alphabet)

        if h.shape != (L, alphabet_len):
            print(f"Error: 'h' must have shape (L, len(alphabet)), but got {h.shape}.")
            return

        if J.shape != (L, L, alphabet_len, alphabet_len):
            print(f"Error: 'J' must have shape (L, L, len(alphabet), len(alphabet)), but got {J.shape}.")
            return

        # If all checks pass, assign seq_len
        seq_len = L
        print(f"Potts model loaded successfully. Sequence length (L) is {seq_len}.")

# Attach the function to the button
load_button.on_click(load_potts_model)

# Display the widgets
display(load_example_toggle, load_button, output)

# Define alphabet and AAdict
alphabet = '-ACDEFGHIKLMNPQRSTVWY' #@param {type:"string"}
AAdict = create_aa_dict(alphabet)

ToggleButtons(description='Choose Option:', options=('Upload File', 'Load Example'), tooltips=('Upload your ow…

Button(button_style='success', description='Load Potts Model', style=ButtonStyle(), tooltip='Click to load the…

Output()

In [14]:
# @title Parameter Setup
""" Set parameters interactively, with an option to use example values and confirm manually entered values. """


# Define widgets for the parameters with larger layouts
prot_name_input = widgets.Text(
    value='',  # Initially empty
    description='Protein Name:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='500px')  # Set width to 500px
)

sequence_input = widgets.Textarea(  # Use Textarea for multi-line input
    value='',  # Initially empty
    description='Sequence:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='500px', height='100px')  # Set width and height
)

tsel_input = widgets.FloatText(
    value=0.0,  # Initially empty
    description='Tsel:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='500px')  # Set width to 500px
)

foldon_boundaries_input = widgets.Text(
    value='',  # Initially empty
    description='Foldon Boundaries:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='500px')  # Set width to 500px
)

# Buttons
use_example_button = widgets.Button(
    description='Use Example',
    disabled=False,
    button_style='info',  # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Click to populate example values'
)

confirm_button = widgets.Button(
    description='Confirm',
    disabled=False,
    button_style='success',  # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Click to confirm manually entered values'
)

# Output widget for displaying messages
output = widgets.Output()

# Function to populate example values
def use_example(_):
    with output:
        output.clear_output()  # Clear previous output
        # Populate the fields with example values
        prot_name_input.value = 'ubiquitin_example'
        sequence_input.value = 'LFVMTLTGRTITINLNRKATVEDLKDRIQDREGLPADQQRILFAGKQLADGLYLSDYNVQRESTLHLVLRMR'
        tsel_input.value = 228.943009
        foldon_boundaries_input.value = '0, 19, 31, 43, 56'
        print("Example values populated.")

# Function to confirm manually entered values
def confirm_values(_):
    global prot_name, Sequence, Tsel, breaks  # Declare variables as global at the beginning
    with output:
        output.clear_output()  # Clear previous output
        # Retrieve the values
        prot_name = prot_name_input.value
        sequence = sequence_input.value
        tsel = tsel_input.value
        foldon_boundaries = [int(x.strip()) for x in foldon_boundaries_input.value.split(',')] if foldon_boundaries_input.value else []
        breaks = np.array(foldon_boundaries)

        # Validate the values
        if not prot_name:
            print("Error: Protein Name is required.")
            return
        if not sequence:
            print("Error: Sequence is required.")
            return
        if not foldon_boundaries:
            print("Error: Foldon Boundaries are required.")
            return

        # Check if the sequence length matches seq_len
        if len(sequence) != seq_len:
            print(f"Error: Sequence length must be {seq_len}, but got {len(sequence)}.")
            return

        # Store the confirmed values in the original variable names
        Sequence = sequence
        Tsel = tsel

        print("Values confirmed successfully.")
        print("Protein Name:", prot_name)
        print("Sequence:", Sequence)
        print("Tsel:", Tsel)
        print("Foldon Boundaries:", breaks)

# Attach the functions to the buttons
use_example_button.on_click(use_example)
confirm_button.on_click(confirm_values)

# Display the widgets
display(prot_name_input, sequence_input, tsel_input, foldon_boundaries_input, use_example_button, confirm_button, output)

Text(value='', description='Protein Name:', layout=Layout(width='500px'), style=DescriptionStyle(description_w…

Textarea(value='', description='Sequence:', layout=Layout(height='100px', width='500px'), style=DescriptionSty…

FloatText(value=0.0, description='Tsel:', layout=Layout(width='500px'), style=DescriptionStyle(description_wid…

Text(value='', description='Foldon Boundaries:', layout=Layout(width='500px'), style=DescriptionStyle(descript…

Button(button_style='info', description='Use Example', style=ButtonStyle(), tooltip='Click to populate example…

Button(button_style='success', description='Confirm', style=ButtonStyle(), tooltip='Click to confirm manually …

Output()

In [38]:
# @title Upload PDB File
"""
Upload a PDB file or load an example file interactively.

The PDB file can be uploaded by the user or an example file can be loaded.
"""

# Output widget for displaying messages
output_pdb = widgets.Output()

# Toggle to choose between uploading a file or loading an example
load_example_toggle_pdb = widgets.ToggleButtons(
    options=['Upload File', 'Load Example'],
    description='Choose Option:',
    style={'description_width': '250px'},
    disabled=False,
    button_style='',  # 'success', 'info', 'warning', 'danger' or ''
    tooltips=['Upload your own PDB file', 'Load an example PDB file']
)

# Integer input for pdb_seq_beg
pdb_seq_beg_input = widgets.IntText(
    value=1,  # Default value
    description='Starting position of the sequence in the PDB file:',
    disabled=False,
    layout=widgets.Layout(width='400px'),  # Wider input box
    style={'description_width': '300px'}  # Wider description area
)

# Button to trigger the loading process
load_button_pdb = widgets.Button(
    description='Load PDB File',
    disabled=False,
    button_style='success',  # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Click to load the PDB file'
)

# Global variables
pdb_file = None
pdb_seq_beg = 1  # Default value

# Function to handle the loading process
def load_pdb_file(_):
    global pdb_file, pdb_seq_beg  # Declare `pdb_file` and `pdb_seq_beg` as global
    with output_pdb:
        output_pdb.clear_output()  # Clear previous output
        if load_example_toggle_pdb.value == 'Load Example':
            # Load an example file
            pdb_file = DATA_DIR + '/pdb_files/1ubq_cleaned.pdb'
            pdb_seq_beg = 3  # Set default value for the example file
            pdb_seq_beg_input.value = 3  # Update the widget value
            print("Example PDB file '1ubq_cleaned.pdb' loaded successfully.")
        else:
            # Upload a file
            print("Please upload a PDB file.")
            uploaded = files.upload()
            if uploaded:
                # Get the name of the uploaded file
                file_name = list(uploaded.keys())[0]
                pdb_file = file_name
                pdb_seq_beg = pdb_seq_beg_input.value  # Use the value from the widget
                print(f"File '{file_name}' uploaded and loaded successfully.")
            else:
                print("No file was uploaded.")
        print(f"Starting position is set to: {pdb_seq_beg}")

# Attach the function to the button
load_button_pdb.on_click(load_pdb_file)

# Display the widgets
display(load_example_toggle_pdb, pdb_seq_beg_input, load_button_pdb, output_pdb)

ToggleButtons(description='Choose Option:', options=('Upload File', 'Load Example'), style=ToggleButtonsStyle(…

IntText(value=1, description='Starting position of the sequence in the PDB file:', layout=Layout(width='400px'…

Button(button_style='success', description='Load PDB File', style=ButtonStyle(), tooltip='Click to load the PD…

Output()

In [21]:
# @title Folding Simulation

def final_folding_simulation():
    # Output widget for displaying messages
    output = Output()

    # Separate output widget for 3D visualization
    pdb_output = Output()

    # Widgets for vmin, vmax, and cp_factor
    vmin_widget = widgets.IntSlider(
        value=100,
        min=10,
        max=500,
        step=10,
        description='Min temperature:',
        continuous_update=False,
        style={'description_width': 'initial'}
    )

    vmax_widget = widgets.IntSlider(
        value=500,
        min=50,
        max=1000,
        step=10,
        description='Max temperature:',
        continuous_update=False,
        style={'description_width': 'initial'}
    )

    cp_factor_widget = widgets.IntSlider(
        value=20,
        min=1,
        max=10000,
        step=1,
        description='Critical Points factor:',
        continuous_update=False,
        style={'description_width': 'initial'}
    )

    # Action button to run the simulation
    simulation_button = widgets.Button(description="Run Simulation")

    # Function to run the simulation and handle errors
    def run_simulation(_):
        with output:
            output.clear_output(wait=True)
            print('The simulation may take a few minutes, please wait.')

            vmin = vmin_widget.value  # Get the vmin value from the widget
            vmax = vmax_widget.value  # Get the vmax value from the widget
            num_cores = cpu_count()

            # Use the provided Sequence variable
            confirmed_sequence = Sequence


            # Ensure the sequence is available
            if not confirmed_sequence:
                print("Error: Sequence is empty.")
                return

            # Run the simulation
            try:
                output_dir = 'results/'
                os.makedirs(output_dir, exist_ok=True)

                # Pass the confirmed sequence to the simulation
                features = ising_simulation(
                    potts=potts,  # Use the user-provided potts
                    breaks=breaks,  # Use the user-provided foldon_boundaries
                    folder=output_dir,
                    AAdict = AAdict,
                    seq=confirmed_sequence,  # Use the confirmed sequence
                    prot_name=prot_name,
                    Tsel=Tsel,  # Use the user-provided Tsel
                    si0=0.005,  # Same as in ank paper
                    k=0.001985875,  # [kcal /(mol K)]
                    tini_=vmin,
                    tfin_=vmax,
                    DT=10,
                    cp_factor=cp_factor_widget.value,
                    interactions_off=False
                )
                print("Simulation completed successfully.")

                # Call visualization functions only after simulation is successful
                plot_results_and_visualizations(output_dir, prot_name, features, vmin, vmax, pdb_file, breaks)

            except Exception as e:
                print(f"Error during simulation: {e}")

    # Function to handle all plotting/visualization tasks
    def plot_results_and_visualizations(output_dir, prot_name, features, vmin, vmax, pdb_file, breaks):
        seq_len = len(Sequence)

        try:
            # Plot results
            fig, ax = build_axes_2(1, 20, 5.5)
            ax_ff = ax[0]
            ax_domains_and_fe = [ax[1], ax[2], ax[3]]
            colors_d = plot_ising(
                output_dir, ax_ff, ax_domains_and_fe, prot_name=prot_name, num_cores=cpu_count(),
                vmin=vmin, vmax=vmax, lw=0.6, lw_fq=1.5, alpha_fq=1, inter_t=1,
                fontsize=10, noninf=False, t0=50)
            plt.show()

            # 3D Visualization
            with pdb_output:
                pdb_output.clear_output(wait=True)
                t_ = load_features(output_dir + prot_name + '/')['t_']

                # PDB

                ali_seq_num_pdb = np.arange(pdb_seq_beg, pdb_seq_beg + seq_len)

                temps_seq_ref, colors_seq_ref = map_t_seq_3d(
                    t_, breaks, seq_len, rgb=True, vmin=vmin, vmax=vmax)

                view = view_3d_exon_hist(
                    ali_seq_num_pdb, [rgb2hex(c) for c in colors_seq_ref / 255], pdb_file)

                view.show()

        except Exception as e:
            print(f"Error during visualization: {e}")

    # Attach the simulation action to the simulation button
    simulation_button.on_click(run_simulation)

    # Display all widgets together
    display(VBox([
        vmin_widget,
        vmax_widget,
        cp_factor_widget,
        simulation_button,
        output,
        pdb_output
    ]))


# Call the final simplified widget
final_folding_simulation()

VBox(children=(IntSlider(value=100, continuous_update=False, description='Min temperature:', max=500, min=10, …

In [52]:
# @title Cooperativity predictions for single-site mutants


# Define widgets for sequence input
sequence_input_new = widgets.Textarea(
    value='',  # Initially empty
    description='Sequence:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='500px', height='100px')  # Set width and height
)

use_previous_sequence_button = widgets.Button(
    description='Use Simulation Sequence',
    disabled=False,
    button_style='info',  # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Click to use the sequence from the previous simulation',
    layout=widgets.Layout(width='200px')
)

confirm_sequence_button = widgets.Button(
    description='Confirm Sequence',
    disabled=False,
    button_style='success',  # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Click to confirm the sequence and check its length',
    layout=widgets.Layout(width='200px')
)

run_cooperativity_button = widgets.Button(
    description='Run Cooperativity Prediction',
    disabled=True,  # Initially disabled
    button_style='warning',  # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Click to run cooperativity change prediction',
    layout=widgets.Layout(width='200px')
)

# Output widget for displaying messages
output_new = widgets.Output()

# Function to use the previous sequence
def use_previous_sequence(_):
    global Sequence  # Access the global Sequence variable
    with output_new:
        output_new.clear_output()  # Clear previous output
        if 'Sequence' in globals() and Sequence:
            sequence_input_new.value = Sequence  # Populate the widget with the previous sequence
            print("Previous sequence loaded.")
        else:
            print("Error: No previous sequence found.")

# Function to confirm the sequence and check its length
def confirm_sequence(_):
    global confirmed_sequence  # Declare confirmed_sequence as global
    with output_new:
        output_new.clear_output()  # Clear previous output
        sequence = sequence_input_new.value.strip()  # Get the sequence from the widget
        if not sequence:
            print("Error: Sequence is required.")
            return

        # Check if the sequence length matches seq_len
        if len(sequence) != seq_len:
            print(f"Error: Sequence length must be {seq_len}, but got {len(sequence)}.")
            return

        # Store the confirmed sequence
        confirmed_sequence = sequence
        print("Sequence confirmed successfully.")
        print("Sequence:", confirmed_sequence)

        # Enable the cooperativity prediction button
        run_cooperativity_button.disabled = False

# Function to run cooperativity prediction
def run_cooperativity_prediction(_):
    global coop_predictions  # Declare coop_predictions as global
    with output_new:
        output_new.clear_output()
        if 'confirmed_sequence' not in globals() or not confirmed_sequence:
            print("Error: No confirmed sequence available.")
            return
        print('Cooperativity predictions may take a few seconds, please wait.')

        # Convert sequence to uppercase
        seq = np.array([np.char.upper(x) for x in confirmed_sequence])
        seq_len = len(seq)

        # Foldons
        seq_ix = breaks + pdb_seq_beg

        # Load cooperativity coefficients
        coef = pd.read_csv(os.path.join(DATA_DIR, 'cooperativity_fit.csv'), index_col=0)

        # Compute energy averages
        k = 0.001985875
        m = -1 / (k * Tsel)

        decoy_es_mean, decoy_dif_ei_mean = compute_decoy_energy_averages(seq, potts, breaks, m, AAdict)
        es_mean_wt, dif_ei_mean_wt = compute_energy_averages(seq, potts, breaks, m, AAdict)

        # Plot results
        fig, ax = plt.subplots(1, figsize=(int(len(seq) / 5), 3.8))
        fontsize = 12
        tick_size = 10
        num_size = 8
        xtick_sample = 10

        prediction_mut, prediction_wt, error = make_prediction_plots(
            ax, coef, dif_ei_mean_wt, es_mean_wt, decoy_dif_ei_mean, decoy_es_mean, seq_ix, xtick_sample, fontsize, tick_size, num_size
        )

        # Create mutational table
        coop_predictions = create_mutational_table(prediction_mut, list(seq), 'cooperativity', pdb_seq_beg)
        coop_predictions.loc[coop_predictions['cooperativity_mutant'] > 1, 'cooperativity_mutant'] = 1
        coop_predictions.loc[coop_predictions['cooperativity_mutant'] < 0, 'cooperativity_mutant'] = 0
        output_new.clear_output()

        # Display the plot
        plt.show()

        print("Analysis completed. The cooperativity predictions are stored in the variable `coop_predictions`.")

# Attach the functions to the buttons
use_previous_sequence_button.on_click(use_previous_sequence)
confirm_sequence_button.on_click(confirm_sequence)
run_cooperativity_button.on_click(run_cooperativity_prediction)

# Display the widgets
display(sequence_input_new, use_previous_sequence_button, confirm_sequence_button, run_cooperativity_button, output_new)

Textarea(value='', description='Sequence:', layout=Layout(height='100px', width='500px'), style=DescriptionSty…

Button(button_style='info', description='Use Simulation Sequence', layout=Layout(width='200px'), style=ButtonS…

Button(button_style='success', description='Confirm Sequence', layout=Layout(width='200px'), style=ButtonStyle…



Output()

In [51]:
coop_predictions.sort_values('cooperativity_difference').head(10)

Unnamed: 0,Position,wt_aa,mut_aa,cooperativity_mutant,cooperativity_difference,mutant
990,50,L,D,0.626475,-0.340266,L50D
849,43,I,K,0.678002,-0.28874,I43K
852,43,I,N,0.678002,-0.28874,I43N
844,43,I,E,0.678002,-0.28874,I43E
582,30,I,R,0.709353,-0.257389,I30R
490,26,L,H,0.718312,-0.24843,L26H
492,26,L,K,0.718312,-0.24843,L26K
435,23,V,R,0.735729,-0.231013,V23R
495,26,L,N,0.785566,-0.181176,L26N
798,41,Q,-,0.851325,-0.115417,Q41-
