# IMP example

#### Primer carreguem els mòduls de IMP. 

In [1]:
import os
import sys
import shutil
import random
import csv

import IMP.core
import IMP.pmi.dof
import IMP.pmi.macros
import IMP.pmi.restraints.basic
import IMP.pmi.restraints.stereochemistry
import IMP.pmi.tools
import IMP.pmi.topology
import IMP.pmi.output

#### Jo utilitzo python3.7.

In [2]:
print(sys.version)

3.8.10 (default, Nov 26 2021, 20:14:08) 
[GCC 9.3.0]


In [3]:
print(IMP.__version__)

2.16.0


## Functions

In [4]:
def create_tag_molecules(pict_bdb, pict_components, pict_chains, state_tags):
    """
    Create IMP.pmi molecules using fluorophore coordinates
    :param pict_bdb:
    :param pict_components:
    :param pict_chains:
    :param state_tags:
    :return:
    """
    colors = ["blue", "orange", "yellow", "pink", "brown", "purple", "red", "green"]
    tag_molecules = list()
    for n in range(len(pict_components)):
        print('PMI: setting up tag', pict_components[n])
        molecule = state_tags.create_molecule(name=pict_components[n],
                                              sequence="",
                                              chain_id=pict_chains[n])

        atomic = molecule.add_structure(pdb_fn=pict_bdb,
                                        chain_id=pict_chains[n],
                                        res_range=[],  # add only specific set of residues
                                        soft_check=True,  # warns sequence mismatches between fasta and PDB sequence
                                        offset=0)  # to sync PDB residue numbering with FASTA numbering.

        molecule.add_representation(residues=atomic,  # adding all residues of PDB to representation
                                    bead_extra_breaks=[],  # Additional breakpoints for splitting beads.
                                    color=colors[n],
                                    bead_ca_centers=True,  # resolution=1 beads to be at CA centers
                                    resolutions=[1])

        tag_molecules.append(molecule)

    return tag_molecules


def get_pict_distance_restraints_dict(dr_tags_file):
    """
    Return dictionary of distances between tags and termini of cryo exocyst
    :param dr_tags_file:
    """
    # Defining PICT distances (from tags to termini of subunits)
    distances_to_tags_dict = dict()
    with open(dr_tags_file, "r") as dr:
        "dr_file --> tag,tag_residue,protein,protein_residue,raw_distance,Added distance,Final Distance"
        csv_reader = csv.reader(dr, delimiter=',')  # to parse the csv file delimited with commas.
        dr.readline()  # skip first line
        for line in csv_reader:
            tag = line[0]
            protein = line[2]
            tag_residue = int(line[1])
            protein_residue = int(line[3])
            chain_id = str(line[4])
            max_distance = float(line[7])
            raw_dist = float(line[5])
            sd = float(line[8])
            if raw_dist == 202:
                distances_to_tags_dict.setdefault("{}_frb-{}_C".format(tag, protein),
                                                  [tag, protein, tag_residue, protein_residue, chain_id, max_distance,
                                                   sd])

            elif raw_dist == 110:
                distances_to_tags_dict.setdefault("{}_gfp_c-{}_C".format(tag, protein),
                                                  [tag, protein, tag_residue, protein_residue, chain_id, max_distance,
                                                   sd])

            else:
                distances_to_tags_dict.setdefault("{}_gfp_n-{}_N".format(tag, protein),
                                                  [tag, protein, tag_residue, protein_residue, chain_id, max_distance,
                                                   sd])
    return distances_to_tags_dict

def set_pict_distance_restraints(distances_to_tags_dict, root_hierarchy, output_objects):
    """
    Setting and adding basic Harmonic Upper Bound distance restraints from distances_to_tags
    dictionary
    :return: distances_to_tags_dict, dr_list, output_objects
    """
    # Distance restraints to positioned tags:
    dr_list = list()
    for r, data in distances_to_tags_dict.items():
        tag = data[0]
        protein = data[1]
        tag_rn = int(data[2])
        protein_rn = int(data[3])
        max_distance = float(data[5])
        sd = float(data[6])
        kappa = IMP.core.Harmonic_get_k_from_standard_deviation(sd)
        dr = IMP.pmi.restraints.basic.DistanceRestraint(root_hier=root_hierarchy,
                                                        tuple_selection1=(tag_rn, tag_rn, tag),
                                                        tuple_selection2=(protein_rn, protein_rn, protein),
                                                        distancemin=0,
                                                        distancemax=max_distance,
                                                        # kappa=kappa,
                                                        label="restraint_{}".format(r))
        dr.add_to_model()
        dr.evaluate()
        dr_list.append(dr)
        output_objects.append(dr)
        print("\nSetting distance {}\n".format(dr.label))
        print("Max distance {}\n".format(max_distance))

    return distances_to_tags_dict, dr_list, output_objects


def distance_restraints_pict_cryo(dr_tags_file, root_hierarchy, output_objects):
    # Defining PICT distances (from tags to termini of subunits)
    distances_to_tags_dict = get_pict_distance_restraints_dict(dr_tags_file)

    # Distance restraints to positioned tags:
    return set_pict_distance_restraints(distances_to_tags_dict, root_hierarchy, output_objects)

## Define PATHS

Defineixo els paths on tinc les dades (input) i on ho vull guardar (output)

In [5]:
# ---------------------------
# 1. Define Input Data and Output Directories
# ---------------------------
data_directory = "./test_output/SEC3/IMP/"                             # input dir
topology_file = data_directory + "SEC3.topology"           # aquí defineixo paràmetres del model
# cryo_pdb_model = data_directory + "5yfp.pdb"                  # PDB cryoEM
pict_pdb_models = "/home/gallegolab/Desktop/IMP_tests/scripts_Altair/fluorophores_modified/"   # Fluorophores information
dr_tags_file = "/home/gallegolab/Desktop/IMP_tests/scripts_Altair/distance_restraints.csv"     # Restraints de distància en un CSV
output_directory = "./test_output/SEC3/IMP"    # Output dir
output_index = "test_SEC3"# sys.argv[1]  # N (number iterations )output prefix

## Parameters

Definexio els paràmetres per Monte Carlo Replica Exchange i per excluded volume i connectivity.

In [6]:
# --------------------------
# 2. Scoring Parameters
# --------------------------

# --------------
# ----- Sterochemistry and Physical Restraints
ev_weight = 1.0  # Weight of excluded volume restraint
connectivity_scale = 1.0  # weight of Connectivity restraint

# --------------------
# 3. Sampling Parameters
# --------------------
num_frames = 30  # int(sys.argv[3])   # Number of frames in MC run
num_best_scoring_models = 3
num_mc_steps = 50  # Number of MC steps per frame
mc_temperature = 1.0  # Temperature for MC

# --- Simulated Annealing (sa)
#  - Alternates between two MC temperatures
sim_annealing = True  # If true, run simulated annealing
sa_min_temp_steps = 100  # Steps at min temp
sa_max_temp_steps = 20  # Steps at max temp
sa_temps = (1.0, 5.0)  # Sim annealing temperatures

# Replica Exchange (rex)
rex_temps = (1.0, 5.0)  # Temperature bounds for replica exchange


## MODELING 

Primer definim el model i carraguem les dades des del topology file. Per mirar els settings del topology file mirar a la web https://integrativemodeling.org/2.15.0/doc/ref/classIMP_1_1pmi_1_1topology_1_1TopologyReader.html

In [7]:
# Initialize model
m = IMP.Model()
# Read in the topology file --> to coarse model of exocyst
# Specify the directory where the PDB files and fasta files
topology = IMP.pmi.topology.TopologyReader(topology_file)
# Use the BuildSystem macro to build states from the topology file
bs = IMP.pmi.macros.BuildSystem(m)

Definim les dades dels fluoròfors, els número de partícules i les seves coordenades des de els files PDB. IMP ho carrega tot en un "state" dins del System creat "bs".

In [8]:
# Define tag molecules (fluorophores) as chains and component names and load state
pict_pdb = random.choice(os.listdir(pict_pdb_models))  # get a tag model file randomly
pict_pdb_seqs = IMP.pmi.topology.PDBSequences(m, pict_pdb_models + pict_pdb)  # using IMP.model
pict_chains = [chain for chain in pict_pdb_seqs.sequences]
pict_components = ["tag_sec3", "tag_sec5", "tag_sec6", "tag_sec8", "tag_sec10", "tag_sec15", "tag_exo70", "tag_exo84"]
st_tags = bs.system.create_state()
# Each state can be specified by a topology file.
# Create tag molecules and load to system
tag_molecules = create_tag_molecules(pict_pdb_models + pict_pdb, pict_components, pict_chains, st_tags)

A
B
C
D
E
F
G
H
PMI: setting up tag tag_sec3
PMI: setting up tag tag_sec5
PMI: setting up tag tag_sec6
PMI: setting up tag tag_sec8




PMI: setting up tag tag_sec10
PMI: setting up tag tag_sec15
PMI: setting up tag tag_exo70
PMI: setting up tag tag_exo84


Ara carreguem les dades de l'exocyst des del topology file. 

In [9]:
bs.add_state(topology)
system_molecules = bs.get_molecules()
cryo_components = [mol[1][0] for mol in system_molecules[1].items()]
print(system_molecules)

BuildSystem.add_state: setting up molecule 5lg4_B copy number 0


KeyError: 'input_fasta/SEC3.fasta'

Finalment creem el nostre sistema executant una macro de IMP:

In [None]:
# Build the system representation and degrees of freedom
root_hierarchy, dof = bs.execute_macro(max_rb_trans=4.0,
                                       max_rb_rot=0.3,
                                       max_bead_trans=4.0,
                                       max_srb_trans=4.0,
                                       max_srb_rot=0.3)

Com que volem que es mogui només l'exocyst (els fluorophores no es mouen) definim quines son les parts mòbils i quines no.

Després agitem tot per començar desde una situació inicial random.

In [None]:
# Fix tags rigid bodies but not the exocyst (the protein complex)
fixed_beads, fixed_rbs = dof.disable_movers(tag_molecules,
                                            [IMP.core.RigidBodyMover,
                                             IMP.pmi.TransformMover])

# Randomize the initial configuration before sampling, of only the molecules
# we are interested in (exocyst subunits)
IMP.pmi.tools.shuffle_configuration(root_hierarchy,
                                    excluded_rigid_bodies=dof.get_rigid_bodies(),
                                    max_translation=50,
                                    verbose=False,
                                    cutoff=5.0,
                                    niterations=100)

### Define Scoring Functions Components

Ara definim les restraints que volem utilitzar. 

1- Restriccions de distància --> es troben a un CSV. Per utilitzar-les utilitzem Harmonic Upper Bound restraints de IMP entre partícules de IMP.

2- Connectivity --> que cada molècula que introduim sigui sencera.

3- Excluded Volume --> evitem clashes.

In [None]:
out_objects = list()  # reporter objects (for stat files)

### Distance Restraints

In [None]:
# Here we are defining a number of restraints on our system.
#  For all of them we call add_to_model() so they are incorporated into scoring
#  We also add them to the output_objects list, so they are reported in stat files

# Distances restraints from fluorophores to cryo subunits
dr_tags_dict, dr_list, output_objects = distance_restraints_pict_cryo(dr_tags_file, root_hierarchy, out_objects)


### Connectivity 

In [None]:
# Connectivity keeps things connected along the backbone (ignores if inside
# same rigid body)
all_molecules = IMP.pmi.tools.get_molecules(root_hierarchy)
cryo_molecules = [mol for mol in all_molecules if "tag" not in mol.get_name()]
cr_list = list()
for mol in cryo_molecules:
    mol_name = mol.get_name()
    IMP.pmi.tools.display_bonds(mol)
    cr = IMP.pmi.restraints.stereochemistry.ConnectivityRestraint(mol, scale=connectivity_scale)
    cr.add_to_model()
    cr.set_label(mol_name)
    output_objects.append(cr)
    cr_list.append(cr)

### Excluded Volume

In [None]:
# Excluded Volume Restraint
#  To speed up this expensive restraint, we evaluate it at resolution 10
ev = IMP.pmi.restraints.stereochemistry.ExcludedVolumeSphere(included_objects=cryo_molecules,
                                                             resolution=10)
ev.set_weight(ev_weight)
ev.add_to_model()
output_objects.append(ev)

In [None]:
# Quickly move all flexible beads into place
dof.optimize_flexible_beads(nsteps=100)

## Sampling: Monte Carlo Replica Exchange

In [None]:
# --------------------------
# Monte-Carlo Sampling
# --------------------------

# This object defines all components to be sampled as well as the sampling protocol
mc1 = IMP.pmi.macros.ReplicaExchange0(m,
                                      root_hier=root_hierarchy,
                                      monte_carlo_sample_objects=dof.get_movers(),
                                      output_objects=output_objects,
                                      rmf_output_objects=output_objects,
                                      monte_carlo_temperature=mc_temperature,
                                      simulated_annealing=sim_annealing,
                                      simulated_annealing_minimum_temperature=min(sa_temps),
                                      simulated_annealing_maximum_temperature=max(sa_temps),
                                      simulated_annealing_minimum_temperature_nframes=sa_min_temp_steps,
                                      simulated_annealing_maximum_temperature_nframes=sa_max_temp_steps,
                                      replica_exchange_minimum_temperature=min(rex_temps),
                                      replica_exchange_maximum_temperature=max(rex_temps),
                                      number_of_best_scoring_models=num_best_scoring_models,
                                      monte_carlo_steps=num_mc_steps,
                                      number_of_frames=num_frames,
                                      global_output_directory=output_directory + "_" + str(output_index))

# Start Sampling
mc1.execute_macro()

## output

IMP torna com a output un directory amb:
- stats files --> files per fer analysis
- rmf --> amb chimera es poden veure les iteracions
- pdbs --> s'hi troben els PDBs utilitzats per cada "state"
- initial.0.rmf3 --> el frame inicial de partida abans de modelar.