<img src="https://raw.githubusercontent.com/engelberger/FrustraEvo/master/dalle3.png" height="200" align="right" style="height:240px">

# FrustraEvo Google Colab Notebook

This notebook is designed to help you understand and use FrustraEvo, a tool that allows you to study energetic patterns within and between protein families. FrustraEvo calculates the frustration logo and the frustration contact maps, using a set of aligned sequences (MSA) and their corresponding structures as input files.

## Usage

To use FrustraEvo, please follow the instructions in this notebook.

## Feedback/Issues

Please report any issues to mariaines.freiberger@gmail.com

In [None]:
#@title Setup Docker and Pull Image

import os
import hashlib

def add_hash(x,y):
  return x+"_"+hashlib.sha1(y.encode()).hexdigest()[:5]

def check_dir_exists(dir_path):
  return os.path.exists(dir_path)

# Install udocker
print("Installing udocker...")
!pip install -q udocker

# Allow root access for udocker
print("Allowing root access for udocker...")
!udocker --allow-root install

# Pull the FrustraEvo image
print("Pulling FrustraEvo image...")
!udocker --allow-root pull proteinphysiologylab/frustraevo > /dev/null


In [None]:
#@title Run FrustraEvo
from google.colab import files
import os
#@markdown Create a folder where you will put the PDB files and the Multiple Sequence Alignment file (MSA) in .fasta format.

folder_name = 'FrustraEvo'

#@markdown Enter the parameters for the bash file:

job_id = 'example' #@param {type:"string"}
input_files = 'input_files'

run_example = True #@param {type:"boolean"}
#@markdown If run example is True the following variables will be ommited!
fasta_file = 'upload_fasta'  #@param ["upload_fasta"]
pdb_source = 'upload_zip' #@param ["upload_zip", "predict_with_esm"]
protein_ref = '3a0g-A' #@param {type:"string"}
contact_maps = 'yes' #@param ["yes", "no"]

# Add hash to job_id
job_id_hashed = add_hash(job_id, fasta_file)

# Check if directory with job_id_hashed exists
if check_dir_exists(job_id_hashed):
  n = 0
  while check_dir_exists(f"{job_id_hashed}_{n}"):
    n += 1
  job_id_hashed = f"{job_id_hashed}_{n}"

# Run FrustraEvo:

# Create folders
print("Creating folders...")
os.makedirs(os.path.join(os.getcwd(), f"{folder_name}/{job_id_hashed}/{input_files}"), exist_ok=True)
os.makedirs(os.path.join(os.getcwd(), f"{folder_name}/{job_id_hashed}/output_files"), exist_ok=True)

if run_example == False:
  if fasta_file == 'upload_fasta':
    # Upload fasta file
    print("Please upload your fasta file")
    uploaded = files.upload()

    # Save uploaded file to the correct directory
    for fn in uploaded.keys():
      with open(os.path.join(os.getcwd(), f"{folder_name}/{job_id_hashed}/{input_files}", fn), 'wb') as f:
          f.write(uploaded[fn])
      if fn.endswith('.fasta'):
        fasta_file = fn  # Update the fasta_file variable with the name of the uploaded file

  if pdb_source == 'upload_zip':
    # Upload PDB files
    print("Please upload your PDB files in a zip format")
    uploaded = files.upload()

    # Unzip the uploaded file
    for fn in uploaded.keys():
      # Save uploaded file to the correct directory
      with open(os.path.join(os.getcwd(), f"{folder_name}/{job_id_hashed}/{input_files}", fn), 'wb') as f:
          f.write(uploaded[fn])
      if fn.endswith('.zip'):
        !unzip -q {os.path.join(os.getcwd(), f"{folder_name}/{job_id_hashed}/{input_files}", fn)} -d {os.path.join(os.getcwd(), f"{folder_name}/{job_id_hashed}/{input_files}")}

  if pdb_source == 'predict_with_esm':
    # TODO : Implement prediction with ESM
    pass


if run_example:
  job_id = 'example'
  fasta_file = 'Alphas.fasta'
  protein_ref = '3a0g-A'
  # Download example inputs
  print("Downloading example inputs...")
  !wget -q -O {os.path.join(os.getcwd(), folder_name, job_id_hashed, input_files, fasta_file)} https://frustraevo.qb.fcen.uba.ar/static/{fasta_file} > /dev/null
  !wget -q -O {os.path.join(os.getcwd(), folder_name, job_id_hashed, input_files, "pdbs.zip")} https://frustraevo.qb.fcen.uba.ar/static/pdbs.zip > /dev/null
  !unzip -qq -o {os.path.join(os.getcwd(), folder_name, job_id_hashed, input_files, "pdbs.zip")} -d {os.path.join(os.getcwd(), folder_name, job_id_hashed, input_files)} > /dev/null


# Create and populate the bash script
print("Creating bash script...")
bash_script_content = f"""
#! /bin/bash
udocker --allow-root run -v $1:/pdb/ --rm proteinphysiologylab/frustraevo:latest /bin/bash -c "cd / && sh /run.sh $2 $3 $4 $5" > log.txt
cd $1
chown -R $(whoami) $1
mv FrustraEvo_$2 ../output_files
"""
with open(os.path.join(os.getcwd(), f"{folder_name}/{job_id_hashed}/run.sh"), "w") as file:
    file.write(bash_script_content)

# Run the bash script
print(f"Running bash script...")
!sudo sh {os.path.join(os.getcwd(), folder_name, job_id_hashed, 'run.sh')} {os.path.join(os.getcwd(), folder_name, job_id_hashed, input_files)} {job_id_hashed} {fasta_file} {protein_ref} {contact_maps}

#@markdown Your results will be saved in the specified folder.

In [None]:
#@title Display interactive tables
import matplotlib.pyplot as plt
import pandas as pd
import IPython.display as display
from google.colab import data_table

data_table.enable_dataframe_formatter()

# Define the path to the output files
output_path = os.path.join(os.getcwd(), folder_name, job_id_hashed, 'output_files', f'FrustraEvo_{job_id_hashed}', 'OutPutFiles')

# Display tables
table_files = [f for f in os.listdir(output_path) if f.endswith('.tab')]
for table_file in table_files:
    print(f"Displaying table: {table_file}")
    df = pd.read_csv(os.path.join(output_path, table_file), sep='\t')
    display.display(df)

In [None]:
#@title Display output figures
import matplotlib.pyplot as plt
import pandas as pd
import IPython.display as display
from google.colab import data_table

data_table.enable_dataframe_formatter()

# Define the path to the output files
output_path = os.path.join(os.getcwd(), folder_name, job_id_hashed, 'output_files', f'FrustraEvo_{job_id_hashed}', 'OutPutFiles')

# Display images
image_files = [f for f in os.listdir(output_path) if f.endswith('.png')]
for image_file in image_files:
    print(f"Displaying image: {image_file}")
    display.display(display.Image(os.path.join(output_path, image_file)))



