<a href="https://colab.research.google.com/github/agdiaz/Bloom/blob/master/Bio2ByteTools_v3_multipleseq_pypi.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@title 1. Install the Bio2Byte tools package and its dependencies

#@markdown Once this cell has been executed, a zip-archive with
#@markdown the obtained prediction will be automatically downloaded
#@markdown to your computer.

#@markdown **Important**: Given we are installing new versions of some dependencies
#@markdown please click on the "Restart runtime" button after pip finished 
#@markdown installing libraries (and run this cell again)

!pip install b2bTools==3.0.4 biopython matplotlib==3.4.0 ipympl

In [None]:
#@title 2.a Upload your MSA file

#@markdown Once this cell has been executed, your target MSA file
#@markdown will be ready to run the predictions on it
import os
import shutil
from google.colab import files

if os.path.exists("/content/results"):
    shutil.rmtree("/content/results")

msa_filename = None

uploaded = files.upload()

for fn in uploaded.keys():
  print('MSA file "{name}" with length {length} bytes uploaded with success'.format(
      name=fn, length=len(uploaded[fn])))
  msa_filename = fn

In [None]:
#@title 2.b MSA file format
#@markdown This cell lists the file formats this notebook can read

msa_type = 'fasta' #@param ["clustal", "fasta", "phylip", "stockholm", "emboss"]

In [None]:
#@title 3. Get the predictions for each sequence of the alignment

#@markdown Once this cell has been executed, the Bio2Byte tools predict
#@markdown the values sequence by sequences of the alignment
%%capture

from b2bTools.multipleSeq.Predictor import MineSuiteMSA

msaSuite = MineSuiteMSA()
msaSuite.predictAndMapSeqsFromMSA(f"/content/{msa_filename}", predTypes = ('eFoldMine', 'disoMine', 'dynamine'))
msaSuite.getDistributions()

In [None]:
#@title 4. Sequence conservation (Shannon's entropy)
#@markdown These functions declared here will be used in the following cells to
#@markdown plot different predictions

import math
import Bio
from Bio import AlignIO

#Shannon_entropy calculates the conservation of the amino acids at each position. It takes into account 
#the gaps in the columns. Check https://doi.org/10.1002/prot.10146 for info formula
def shannon_entropy(list_input):
    tot = len(list_input) #total number of AA at particular position in MSA
    gaps = list_input.count("-") / tot #count number of gaps in that column of MSA
    unique_base = set(list_input) #remove duplicates
    unique_base_len = len(unique_base) #total number of AA at particular position in MSA with no duplicates
    
    entropy_list = [] # entropy of AA at particular position
    
    for base in unique_base:
        n_i = list_input.count(base)                 
        P_i = n_i / tot
        entropy_i = P_i * (math.log(P_i, 2))
        
        entropy_list.append(entropy_i)
    
    #sum entropy of every residue at a position and normalize it so it is between 0 and 1
    entropy_sum = math.fsum(entropy_list)
    min_length = min(tot, unique_base_len)
    
    if min_length == 1: # log(1, 2) = 0; n/0 throws ZeroDivisionError
        shannon_entropy = entropy_sum
    else:
        length_log = math.log(min_length, 2)
        shannon_entropy = (-1 / length_log) * entropy_sum
    
    #Return entropy AA and entropy gaps at 1 position
    #If entropy is high than there are many possible arrangements (high variability)
    return shannon_entropy, gaps


def conservation(alignment_file):
    conservation_AA_list = []

    for col_no in range(len(list(alignment_file[0]))):
        list_input = list(alignment_file[:, col_no])

        sh_entropy_AA, sh_entropy_gaps = shannon_entropy(list_input)
 
        #Translate entropy into conservation. 0 means no conservation, 1 highly conserved
        conservation_AA = (1 - sh_entropy_AA) * (1 - sh_entropy_gaps)
        
        conservation_AA_list.append(conservation_AA)

    return conservation_AA_list

#Read MSA
alignment_file = AlignIO.read(msa_filename, msa_type)
conservation_AA_list = conservation(alignment_file)

In [None]:
#@title 5. Prepare to plot results
#@markdown Run this cell in order to prepare the notebook context to render different plots

from google.colab import output
output.enable_custom_widget_manager()
%matplotlib widget

import matplotlib.pyplot as plt
import os

if not os.path.exists("/content/results"):
    os.mkdir("/content/results", )

PREDICTION_TITLES = {
    'backbone': "DynaMine backbone dynamics",
    'sidechain': "DynaMine sidechain dynamics",
    'sheet': "DynaMine conformational propensities: Sheet",
    'coil': "DynaMine conformational propensities: Coil",
    'helix': "DynaMine conformational propensities: Helix",
    'ppII': "DynaMine conformational propensities: ppII (polyproline II)",
    'disoMine': "Disorder (disoMine)",
    'earlyFolding': "Early folding (EFoldMine)" 
}

PREDICTION_POSITION = {
    'backbone':     (0, 0),
    'sidechain':    (0, 1),
    'ppII':         (0, 2),
    'coil':         (0, 3),
    'sheet':        (1, 0),
    'helix':        (1, 1),
    'disoMine':     (1, 2),
    'earlyFolding': (1, 3) 
}

AXIS_TITLES = {
    "x": "Residue position in the MSA",
    "y": "Prediction values"
}

In [None]:
#@title 5.a Plot global results
#@markdown Once this cell has been executed, a series of plots
#@markdown with the distributions will be render.

import numpy as np
import matplotlib.pyplot as plt

def plot_msa_distrib_no_entropy(jsondata_list, sequences, mutation=False):
    colors = ['blue', 'orange']
    sequences_count = len(sequences)
    residues_count = len(jsondata_list[0]['backbone']['median'])

    #Plot representation
    fig, axs = plt.subplots(2, 4)
    fig.set_figwidth(30)
    fig.set_figheight(10)
    plt.subplots_adjust(hspace=0.4)

    # These for loops got too complicated, I have to think
    # something simpler to handle the None values in the data
    predictions = jsondata_list[0].keys()
    for prediction_index, biophys_data in enumerate(predictions):
        if biophys_data == 'agmata':
            continue
        
        subplot_index_row, subplot_index_col = PREDICTION_POSITION[biophys_data]

        ax = axs[subplot_index_row, subplot_index_col]
        for data, col in zip(jsondata_list, colors):
            none_idx = []
            
            for n in range(residues_count):
                if data[biophys_data]['median'][n] == None \
                        or data[biophys_data][
                    'firstQuartile'][n] == None \
                        or data[biophys_data][
                    'thirdQuartile'][n] == None:
                    none_idx.append(n)

            range_list = []
            for n in range(len(none_idx)):
                try:
                    if none_idx[n] + 1 != none_idx[n + 1]:
                        range_list.append(
                            (none_idx[n] + 1, none_idx[n + 1]))
                    else:
                        continue
                except:
                    if len(none_idx) == 1:
                        range_list.append((0, none_idx[0]))
                        range_list.append((none_idx[0] + 1, len(
                            data[biophys_data][
                                'median'])))

                    else:
                        range_list.append((0, none_idx[0]))
                        range_list.append((none_idx[-1] + 1, len(
                            data[biophys_data][
                                'median'])))

            # When there are None values in the data
            if range_list:
                for tuple in range_list:
                    x = np.arange(tuple[0], tuple[1], 1)
                    firstq = \
                        data[biophys_data][
                            'firstQuartile'][
                        tuple[0]:tuple[1]]
                    thirdq = \
                        data[biophys_data][
                            'thirdQuartile'][
                        tuple[0]:tuple[1]]
                    bottom = \
                        data[biophys_data][
                            'bottomOutlier'][
                        tuple[0]:tuple[1]]
                    top = \
                        data[biophys_data]['topOutlier'][
                        tuple[0]:tuple[1]]
                    ax.fill_between(
                        x, firstq, thirdq, alpha=0.5, color=col, label="1st")
                    ax.fill_between(
                        x, bottom, top, alpha=0.25, color=col, label="3rd")

            # When there aren't None values in the data
            else:
                x = np.arange(0, len(
                    data[biophys_data]['median']), 1)
                firstq = data[biophys_data][
                    'firstQuartile']
                thirdq = data[biophys_data][
                    'thirdQuartile']
                bottom = data[biophys_data][
                    'bottomOutlier']
                top = data[biophys_data]['topOutlier']
                ax.fill_between(
                    x, firstq, thirdq, alpha=0.5, color=col, label="1st")
                ax.fill_between(
                    x, bottom, top, alpha=0.25, color=col, label="3rd")

            ax.plot(data[biophys_data]['median'], linewidth=1, color=col)

            if mutation:
                print(mutation)
                ax.plot(mutation['results'][biophys_data],
                         linewidth=0.5, color='red')

        ax.set_title(PREDICTION_TITLES[biophys_data])
        ax.axis([0, residues_count, -1.0, 1.5])
        ax.set_ylabel(AXIS_TITLES['y'])
        ax.set_xlabel(AXIS_TITLES['x'])
    
    return fig, axs
  
jsondata_list = [msaSuite.alignedPredictionDistribs]
residues_count = len(jsondata_list[0]['backbone']['median'])
sequences =  msaSuite.seqs
fig, axs = plot_msa_distrib_no_entropy(jsondata_list, sequences, mutation=False)

plt.suptitle(f"MSA predicted biophysical properties: {residues_count} aligned residues from {len(sequences)} sequences", fontsize=14)
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), title = "Quartiles:")
plt.tight_layout()
plt.savefig('/content/results/msa.png' )
plt.show()

In [None]:
# Define the ranges of residues to plot:

all_regions = [
    ("First quarter", (0, math.floor(residues_count * 0.25))),
    ("Second quarter", (math.floor(residues_count * 0.25), math.floor(residues_count * 0.50))),
    ("Third quarter", (math.floor(residues_count * 0.5), math.floor(residues_count * 0.75))),
    ("Fourth quarter", (math.floor(residues_count * 0.75), residues_count)),
    # ("All", (0, residues_count)),
]

In [None]:
#@title 5.b. Plot the sequence conservation (Shannon's entropy) results
#@markdown Once this cell has been executed, a plot
#@markdown with the distributions will be render.

import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure

def plot_msa_distrib_all(jsondata_list, sequences, all_regions, conservation_AA_list,mutation=False):
    sequences_count = len(sequences)

    #Color map for conservation sequence
    cmap = mpl.cm.Blues
    normalize = mpl.colors.Normalize(vmin=min(conservation_AA_list), vmax=max(conservation_AA_list))

    colors = ['blue', 'orange']
    for region in all_regions: 

        #To improve: use probabilistic description TM regions/ ICLs/ ECLs
        name_region = region[0]
        lower_lim = region[1][0]
        upper_lim = region[1][1]

        #Plot representation
        fig = plt.figure()

        ax1 = fig.add_subplot(241)
        ax2 = fig.add_subplot(242)
        ax3 = fig.add_subplot(243)
        ax4 = fig.add_subplot(244)
        ax5 = fig.add_subplot(245)
        ax6 = fig.add_subplot(246)
        ax7 = fig.add_subplot(247)
        ax8 = fig.add_subplot(248)

        ax_list = [ax1, ax2, ax3, ax4, ax5, ax6, ax7, ax8]

        fig.set_figwidth(30)
        fig.set_figheight(10)
        fig.suptitle(f'MSA predicted biophysical properties: "{name_region}" (residue {lower_lim} to residue {upper_lim}) from {sequences_count} sequences', fontsize=14)
        
        plt.subplots_adjust(hspace=0.4)

        #For every predicted biophysical property
        for j, biophys_data in enumerate(jsondata_list[0].keys()):
            if biophys_data == 'agmata':
                continue
            
            for data in jsondata_list:
                none_idx = []
                for n in range(len(data[biophys_data]['median'])):
                    if data[biophys_data]['median'][n] == None \
                            or data[biophys_data][
                        'firstQuartile'][n] == None \
                            or data[biophys_data][
                        'thirdQuartile'][n] == None:
                        none_idx.append(n)

                # # When there aren't None values in the data
                # else:
                x = np.arange(lower_lim,upper_lim, 1)
                firstq = data[biophys_data][
                    'firstQuartile'][lower_lim:upper_lim]
                thirdq = data[biophys_data][
                    'thirdQuartile'][lower_lim:upper_lim]
                bottom = data[biophys_data][
                    'bottomOutlier'][lower_lim:upper_lim]
                top = data[biophys_data]['topOutlier'][lower_lim:upper_lim]
                entropy_values = conservation_AA_list[lower_lim:upper_lim]
                
                for i in range(len(x)-1):
                    ax_list[j].fill_between([x[i], x[i+1]], [firstq[i], firstq[i+1]], [thirdq[i], thirdq[i+1]], \
                        color=cmap(normalize(entropy_values[i])))                        
                
                ax_list[j].plot(x,firstq, linewidth=0.25, color="black")
                ax_list[j].plot(x,thirdq, linewidth=0.25, color="black")
                ax_list[j].plot(x,bottom, alpha=0.25, color="black")
                ax_list[j].plot(x,top, alpha=0.25, color="black")

                ax_list[j].plot(data[biophys_data]['median'],linewidth=1, color="black")

                if biophys_data == "backbone": 
                    ax1.set_title(PREDICTION_TITLES[biophys_data])
                    ax1.set_xlim([lower_lim, upper_lim-1])
                    ax1.set_ylim([-0.2, 1.2])
                    ax1.set_xlabel(AXIS_TITLES['x'])
                    ax1.set_ylabel(AXIS_TITLES['y'])
                    ax1.axhline(y=1.0, color='orange', linestyle='-', label="Membrane spaning") #Membrane spaning
                    ax1.axhline(y=0.8, color='blue', linestyle='-', label="Context dependent") #context dependent (either rigide or flexible)
                    ax1.axhline(y=0.69, color='red', linestyle='-', label="Flexible") #flexible
                    ax1.legend(title = "Regions:")

                elif biophys_data == "sidechain":
                    ax2.set_title(PREDICTION_TITLES[biophys_data])
                    ax2.set_xlim([lower_lim,upper_lim-1])
                    ax2.set_ylim([-0.2, 1.2])
                    ax2.set_xlabel(AXIS_TITLES['x'])
                    ax2.set_ylabel(AXIS_TITLES['y'])

                elif biophys_data == "ppII":
                    ax3.set_title(PREDICTION_TITLES[biophys_data])
                    ax3.set_xlim([lower_lim,upper_lim-1])
                    ax3.set_ylim([-0.2, 1.2])
                    ax3.set_xlabel(AXIS_TITLES['x'])
                    ax3.set_ylabel(AXIS_TITLES['y'])

                elif biophys_data == "coil":
                    ax4.set_title(PREDICTION_TITLES[biophys_data])
                    ax4.set_xlim([lower_lim,upper_lim-1])
                    ax4.set_ylim([-0.2, 1.2])
                    ax4.set_xlabel(AXIS_TITLES['x'])
                    ax4.set_ylabel(AXIS_TITLES['y'])

                elif biophys_data == "sheet":
                    ax5.set_title(PREDICTION_TITLES[biophys_data])
                    ax5.set_xlim([lower_lim,upper_lim-1])
                    ax5.set_ylim([-0.2, 1.2])
                    ax5.set_xlabel(AXIS_TITLES['x'])
                    ax5.set_ylabel(AXIS_TITLES['y'])

                elif biophys_data =="helix":
                    ax6.set_title(PREDICTION_TITLES[biophys_data])
                    ax6.set_xlim([lower_lim,upper_lim-1])
                    ax6.set_ylim([-0.2, 1.2])
                    ax6.set_xlabel(AXIS_TITLES['x'])
                    ax6.set_ylabel(AXIS_TITLES['y'])

                elif biophys_data == "earlyFolding":
                    ax7.set_title(PREDICTION_TITLES[biophys_data])
                    ax7.set_xlim([lower_lim,upper_lim-1])
                    ax7.set_ylim([-0.2, 1.2])
                    ax7.set_xlabel(AXIS_TITLES['x'])
                    ax7.set_ylabel(AXIS_TITLES['y'])
                    ax7.axhline(y=0.169, color='red', linestyle='-', label="above: Likely") #above: likely start protein folding process
                    ax7.legend(title = "Regions:")

                elif biophys_data == "disoMine":
                    ax8.set_title(PREDICTION_TITLES[biophys_data])
                    ax8.set_xlim([lower_lim,upper_lim-1])
                    ax8.set_ylim([-0.2, 1.2])
                    ax8.set_xlabel(AXIS_TITLES['x'])
                    ax8.set_ylabel(AXIS_TITLES['y'])
                    ax8.axhline(y=0.5, color='red', linestyle='-', label="above: Likely") #above: likely disordered   
                    ax8.legend(title = "Regions:")


        #To add legend color map (where to put it???)
        # scalarmappaple = mpl.cm.ScalarMappable(norm=normalize, cmap=cmap)
        # scalarmappaple.set_array(entropy_values)
        # plt.colorbar(scalarmappaple)

        plt.savefig('/content/results/'+ name_region + '_entropy.png')
        plt.tight_layout()
        plt.show()
  
jsondata_list = [msaSuite.alignedPredictionDistribs]
sequences = msaSuite.seqs
residues_count = len(jsondata_list[0]['backbone']['median'])

plot_msa_distrib_all(jsondata_list, sequences, all_regions, conservation_AA_list, mutation=False)

In [None]:
#@title 5.c. Plot the sequence conservation (Shannon's entropy) results (SPECIFIC REGION)
#@markdown Once this cell has been executed, a plot
#@markdown with the distributions will be render. 

#@markdown **Notes**: Use the blue bar on the bottom of the plots to adjust the region to render 


import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.widgets import Button, RangeSlider
# from mpl_interactions.widgets import RangeSlider

def plot_msa_distrib(jsondata_list, sequences, lower_lim, upper_lim, conservation_AA_list,mutation=False):
    sequences_count = len(sequences)

    #Color map for conservation sequence
    cmap = mpl.cm.Blues
    normalize = mpl.colors.Normalize(vmin=min(conservation_AA_list), vmax=max(conservation_AA_list))
    colors = ['blue', 'orange']
    
    #To improve: use probabilistic description TM regions/ ICLs/ ECLs
    name_region = 'section'

    #Plot representation
    fig = plt.figure()

    ax1 = fig.add_subplot(241)
    ax2 = fig.add_subplot(242)
    ax3 = fig.add_subplot(243)
    ax4 = fig.add_subplot(244)
    ax5 = fig.add_subplot(245)
    ax6 = fig.add_subplot(246)
    ax7 = fig.add_subplot(247)
    ax8 = fig.add_subplot(248)

    ax_list = [ax1, ax2, ax3, ax4, ax5, ax6, ax7, ax8]

    fig.set_figwidth(30)
    fig.set_figheight(10)
    plt.subplots_adjust(hspace=0.4)

    fig.suptitle(f'MSA predicted biophysical properties: residue {int(lower_lim)} to residue {int(upper_lim)} from {sequences_count} sequences', fontsize=14)
    
    #For every predicted biophysical property
    for j, biophys_data in enumerate(jsondata_list[0].keys()):
        if biophys_data == 'agmata':
            continue
        
        for data in jsondata_list:
            none_idx = []
            for n in range(len(data[biophys_data]['median'])):
                if data[biophys_data]['median'][n] == None \
                        or data[biophys_data][
                    'firstQuartile'][n] == None \
                        or data[biophys_data][
                    'thirdQuartile'][n] == None:
                    none_idx.append(n)

            # # When there aren't None values in the data
            # else:
            x = np.arange(lower_lim,upper_lim, 1)
            firstq = data[biophys_data][
                'firstQuartile'][lower_lim:upper_lim]
            thirdq = data[biophys_data][
                'thirdQuartile'][lower_lim:upper_lim]
            bottom = data[biophys_data][
                'bottomOutlier'][lower_lim:upper_lim]
            top = data[biophys_data]['topOutlier'][lower_lim:upper_lim]
            entropy_values = conservation_AA_list[lower_lim:upper_lim]
            
            for i in range(len(x)-1):
                ax_list[j].fill_between([x[i], x[i+1]], [firstq[i], firstq[i+1]], [thirdq[i], thirdq[i+1]], \
                    color=cmap(normalize(entropy_values[i])))                        
            
            ax_list[j].plot(x,firstq, linewidth=0.25, color="black")
            ax_list[j].plot(x,thirdq, linewidth=0.25, color="black")
            ax_list[j].plot(x,bottom, alpha=0.25, color="black")
            ax_list[j].plot(x,top, alpha=0.25, color="black")

            ax_list[j].plot(data[biophys_data]['median'],linewidth=1, color="black")

            if biophys_data == "backbone": 
                ax1.set_title(PREDICTION_TITLES[biophys_data])
                ax1.set_xlim([lower_lim, upper_lim-1])
                ax1.set_ylim([-0.2, 1.2])
                ax1.set_xlabel(AXIS_TITLES['x'])
                ax1.set_ylabel(AXIS_TITLES['y'])
                ax1.axhline(y=1.0, color='orange', linestyle='-', label="Membrane spaning") #Membrane spaning
                ax1.axhline(y=0.8, color='blue', linestyle='-', label="Context dependent") #context dependent (either rigide or flexible)
                ax1.axhline(y=0.69, color='red', linestyle='-', label="Flexible") #flexible
                ax1.legend(title = "Regions:")

            elif biophys_data == "sidechain":
                ax2.set_title(PREDICTION_TITLES[biophys_data])
                ax2.set_xlim([lower_lim,upper_lim-1])
                ax2.set_ylim([-0.2, 1.2])
                ax2.set_xlabel(AXIS_TITLES['x'])
                ax2.set_ylabel(AXIS_TITLES['y'])

            elif biophys_data == "ppII":
                ax3.set_title(PREDICTION_TITLES[biophys_data])
                ax3.set_xlim([lower_lim,upper_lim-1])
                ax3.set_ylim([-0.2, 1.2])
                ax3.set_xlabel(AXIS_TITLES['x'])
                ax3.set_ylabel(AXIS_TITLES['y'])

            elif biophys_data == "coil":
                ax4.set_title(PREDICTION_TITLES[biophys_data])
                ax4.set_xlim([lower_lim,upper_lim-1])
                ax4.set_ylim([-0.2, 1.2])
                ax4.set_xlabel(AXIS_TITLES['x'])
                ax4.set_ylabel(AXIS_TITLES['y'])

            elif biophys_data == "sheet":
                ax5.set_title(PREDICTION_TITLES[biophys_data])
                ax5.set_xlim([lower_lim,upper_lim-1])
                ax5.set_ylim([-0.2, 1.2])
                ax5.set_xlabel(AXIS_TITLES['x'])
                ax5.set_ylabel(AXIS_TITLES['y'])

            elif biophys_data =="helix":
                ax6.set_title(PREDICTION_TITLES[biophys_data])
                ax6.set_xlim([lower_lim,upper_lim-1])
                ax6.set_ylim([-0.2, 1.2])
                ax6.set_xlabel(AXIS_TITLES['x'])
                ax6.set_ylabel(AXIS_TITLES['y'])

            elif biophys_data == "earlyFolding":
                ax7.set_title(PREDICTION_TITLES[biophys_data])
                ax7.set_xlim([lower_lim,upper_lim-1])
                ax7.set_ylim([-0.2, 1.2])
                ax7.set_xlabel(AXIS_TITLES['x'])
                ax7.set_ylabel(AXIS_TITLES['y'])
                ax7.axhline(y=0.169, color='red', linestyle='-', label="above: Likely") #above: likely start protein folding process
                ax7.legend(title = "Regions:")

            elif biophys_data == "disoMine":
                ax8.set_title(PREDICTION_TITLES[biophys_data])
                ax8.set_xlim([lower_lim,upper_lim-1])
                ax8.set_ylim([-0.2, 1.2])
                ax8.set_xlabel(AXIS_TITLES['x'])
                ax8.set_ylabel(AXIS_TITLES['y'])
                ax8.axhline(y=0.5, color='red', linestyle='-', label="above: Likely") #above: likely disordered   
                ax8.legend(title = "Regions:")

    return fig, ax_list


jsondata_list = [msaSuite.alignedPredictionDistribs]
sequences = msaSuite.seqs
residues_count = len(jsondata_list[0]['backbone']['median'])

# Create the figure and the line that we will manipulate
fig, ax_list = plot_msa_distrib(jsondata_list, sequences, 0, residues_count, conservation_AA_list)

# Make a horizontal slider to control the frequency.
ax_residues_slider = plt.axes([0.25, 0.1, 0.65, 0.03])
init = (0, residues_count)
residues_slider = RangeSlider(ax_residues_slider, 'Residues', 0, residues_count, valstep=1)

# The function to be called anytime a slider's value changes
def update(val):
    new_lower_lim, new_upper_lim = residues_slider.val
    for ax in ax_list:
        # line.set_xdata(np.linspace(residues_slider.val[0], residues_slider.val[1], 100))
        ax.set_xlim(new_lower_lim, new_upper_lim)
        # line.set_ydata(reliability_graph(t, residues_slider.val[0], residues_slider.val[1]))
    
    fig.canvas.draw_idle()
    fig.suptitle(f'MSA predicted biophysical properties: residue {new_lower_lim} to residue {new_upper_lim} from {len(sequences)} sequences')

# register the update function with each slider
residues_slider.on_changed(update)

# adjust the main plot to make room for the sliders
plt.tight_layout()
plt.subplots_adjust(left=0.05, bottom=0.25)
plt.show()

In [None]:
#@title 6. Save the predictions to json files

#@markdown Once this cell has been executed, a json file with
#@markdown the obtained predictions will be automatically created for each
#@markdown sequence of the alignment inside the /content/results folder
%%capture

import os
from os import path

if not path.exists("/content/results"):
    os.mkdir("/content/results", )

for id, seq in msaSuite.seqs:
    print("PROCESSING SEQUENCE: id={id}".format(id=id))

    json_results = msaSuite.getAllPredictionsJson(identifier=id)
    # print(json_results)
    with open("/content/results/{msa}_{id}.json".format(msa=msa_filename.replace("/", "_"), id=id.replace("/", "_")), "w") as file_output:
        file_output.write(json_results)


In [None]:
#@title 7. Download the predictions

#@markdown Once this cell has been executed, a zip-archive with
#@markdown the obtained predictions will be automatically downloaded
#@markdown to your computer.

!zip --quiet -r /content/b2b-tools-msa.zip /content/results

files.download(f"/content/b2b-tools-msa.zip")