<a href="https://colab.research.google.com/github/phenix-project/Colabs/blob/main/alphafold2/AlphaFoldPhenixGUI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@title 1. AlphaFold from Phenix GUI:  <font color='green'>Hit the "Run cell" button to the left to start </font>

#@markdown <font color='blue'>When "Choose files" button appears, upload your .tgz file from Phenix
#@markdown (usually in your Downloads folder) </font>


#@markdown The "Run cell" button will have a circling arc while running
#@markdown and a summary of what is happening will be printed

#@markdown When finished the "Run cell" button should return to a triangle.

#@markdown The result .zip file should automatically download. If it does
#@markdown not, use the file browser to the left to find it and
#@markdown click on the 3 dots to download it


from google.colab import files
from pathlib import Path
import os

upload_dir = "/content/upload_dir"
content_dir = "/content"
data_dir = "/content"
input_directory = "upload_dir"
cif_dir = "/content/mmcif"

os.chdir(content_dir)
if not os.path.isdir(cif_dir):
  os.mkdir(cif_dir)

if not os.path.isdir(upload_dir):
  print("Creating upload directory %s" %(upload_dir))
  os.mkdir(upload_dir)
os.chdir(upload_dir)
for fn in os.listdir("."):
  if str(fn).endswith("_sequence_info.dat") or \
     str(fn).startswith("run_"):
    os.remove(fn)  # remove any control files

found_tgz = False
uploaded = files.upload()
for filename,contents in uploaded.items():
  if str(filename).endswith(".tgz"):
    if os.path.isfile(filename):
      print("File is present, overwriting")
      os.remove(filename)
    ff = open(filename, 'wb')
    ff.write(contents)
    filename = os.path.abspath(filename)
    print("Wrote file to %s" %(filename))
    found_tgz = True
    break
if not found_tgz:
  print("Could not find a .tgz file in upload...please try again")
  raise AssertionError ("Please upload a .tgz file")

print("Unpacking .tgz file")
os.system("tar xzvf %s" %(filename))
!ls -tlr

os.chdir(content_dir)

file_with_jobname_sequence_lines = None
file_with_instructions = None
for fn in os.listdir(upload_dir):
  if fn.startswith("run_"):
    file_with_instructions = fn
  if fn.endswith("_sequence_info.dat"): 
    file_with_jobname_sequence_lines = os.path.join(upload_dir,fn)
if not file_with_jobname_sequence_lines:
  text = "Unable to find a file_with_jobname_sequence_lines in %s" %(
      upload_dir)
  print(text)
  raise AssertionError (text)
if not file_with_instructions:
  text = "Unable to find a file_with_instructions in %s" %(
      upload_dir)
  print(text)
  raise AssertionError (text)

# Get instructions from file_with_instructions
text = open(os.path.join(upload_dir,file_with_instructions)).read()
instructions = text.split()

number_of_models = 1
include_templates_from_pdb=False
random_seed=86812
upload_manual_templates=False
maximum_templates_from_pdb = 20
random_seed = 581867
skip_all_msa = False

for arg in instructions:
  if arg.find("=")> -1:
    kw,value = arg.split("=")
    if kw == "number_of_models":
      number_of_models = int(value)
    elif kw == "include_templates_from_pdb": 
      if value.lower()=='true':
        include_templates_from_pdb = True
      else:
        include_templates_from_pdb = False
    elif kw == "template_file":
      upload_manual_templates = True
    elif kw == "maximum_templates_from_pdb":
      maximum_templates_from_pdb = int(value)
    elif kw == "random_seed":
      random_seed = int(value)
    elif kw == "skip_all_msa":
      if value.lower() == 'true':
        skip_all_msa = True
      else:
        skip_all_msa = False

sequence = ''
resolution =  None
jobname = 'None'
phenix_download_password='None'
query_sequence = sequence
password = phenix_download_password

# Save all parameters in a dictionary
params = {}
for p in ['cif_dir','content','resolution','jobname', 'password', 'query_sequence']:
  params[p] = locals().get(p,None)

save_outputs_in_google_drive = False 

uploaded_templates_have_exact_sequence = True 
uploaded_templates_are_map_to_model = False
maximum_cycles =  1
phenix_version = None
version = phenix_version  # rename variable

upload_file_with_jobname_sequence_lines = False
upload_file_with_jobname_resolution_sequence_lines = upload_file_with_jobname_sequence_lines

if file_with_jobname_sequence_lines and input_directory and os.path.isfile(
    os.path.join(input_directory,file_with_jobname_sequence_lines)):
  file_with_jobname_sequence_lines = os.path.join(input_directory,file_with_jobname_sequence_lines)
  print("FILE WITH JOBNAME SEQUENCE LINES:",file_with_jobname_sequence_lines)
else:
  print("FILE WITH JOBNAME SEQUENCE LINES MISSING:",file_with_jobname_sequence_lines)
  file_with_jobname_sequence_lines = None
file_with_jobname_resolution_sequence_lines = file_with_jobname_sequence_lines
msa_use = 'Use MSA throughout' 

random_seed_iterations =  number_of_models 

debug = True 

params['jobname'] = None
params['resolution'] = None
params['sequence'] = None


skip_all_msa_after_first_cycle = False

upload_maps = False  # Always for this version
upload_msa_file = False  # Always for this version
starting_alphafold_model = False  # Always for this version
use_msa = (not skip_all_msa)

minimum_random_seed_iterations = int(max(1,random_seed_iterations//20))

if type(params) != type({}):  # convert back to dict
  params = params()

# Save parameters
for p in ['content_dir','data_dir','save_outputs_in_google_drive','input_directory',
    'include_templates_from_pdb','maximum_templates_from_pdb',
    'upload_manual_templates','uploaded_templates_are_map_to_model',
    'maximum_cycles','version',
    'upload_file_with_jobname_resolution_sequence_lines',
    'file_with_jobname_resolution_sequence_lines',
    'use_msa','skip_all_msa_after_first_cycle',
    'upload_maps','debug','carry_on','random_seed',
    'upload_msa_file','starting_alphafold_model',
    'random_seed_iterations','minimum_random_seed_iterations']:
  params[p] = locals().get(p,None)


file_name = 'phenix_colab_utils.py'
if os.path.isfile(file_name):
  os.remove(file_name)
os.environ['file_name'] = file_name
result = os.system("wget -qnc https://raw.githubusercontent.com/phenix-project/Colabs/main/alphafold2/$file_name")

import phenix_colab_utils as cu

cu.install_miniconda()

cu.get_helper_files()  # get all the other helper files
cu.clear_python_caches()


# Set up the inputs using the helper python files
from phenix_alphafold_utils import set_up_input_files
params = set_up_input_files(params, convert_to_params = False)
params['cif_dir'] = None # ZZZ


import phenix_colab_utils as cu


# Get tensorflow import before installation
if not locals().get('tf'):
  tf = cu.import_tensorflow()

# Install selected software
cu.install_software(
  bioconda = True,
  phenix = False,
    phenix_version = params.get('version'),
    phenix_password = params.get('password'),
  alphafold = True,
  pdb_to_cif = True
    )

print("\nRUNNING ALPHAFOLD NOW...")
# Convert params from dict to alphafold_with_density_map params
from phenix_alphafold_utils import get_alphafold_with_density_map_params
params = get_alphafold_with_density_map_params(params)

from run_alphafold_with_density_map import run_jobs

# Working directory
os.chdir(params.content_dir)
results = run_jobs(params)

print("\nALL DONE")


### <center> <b> <font color='black'>  Instructions </font></b> </center>


1. Start your run by hitting the <b><i>Run cell</i></b> button at the upper left corner of the notebook (looks like a black triangle in a circle).

2. When the upload button appears, upload your .tgz file (usually in your Downloads directory) as specified by the Phenix GUI.

3. You can stop the run by clicking on the "Run cell" button. It will turn red.  You can rerun it by clicking it again.

4. If something goes wrong, the "Run cell" button will turn red and a message should appear in red.

5. When the notebook finishes it should download a zip file with your results. If necessary, copy this file to the directory specified in the Phenix GUI.

6. You can close this notebook when done.

<b> <font color='black'> <center>Please cite the ColabFold and AlphaFold2 papers if you use this notebook:</center>
</font></b> 

- <font color='green'>[Mirdita, M.,  Ovchinnikov, S., Steinegger, M.(2021). ColabFold - Making protein folding accessible to all *bioRxiv*, 2021.08.15.456425](https://www.biorxiv.org/content/10.1101/2021.08.15.456425v2)</font> 

- <font color='green'> [Jumper, J., Evans, R., Pritzel, A. et al. Highly accurate protein structure prediction with AlphaFold. Nature 596, 583–589 (2021)](https://www.nature.com/articles/s41586-021-03819-2)
</font>
-----------------
