In [5]:
import pandas as pd
import os
from copy import deepcopy
from math import ceil
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from evcouplings.align.alignment import (
    read_fasta, parse_header
)
from evcouplings.utils.config import (
    check_required, InvalidParameterError
)

from evcouplings.utils.system import (
    create_prefix_folders, insert_dir, verify_resources,
)
from evcouplings.couplings import Segment
from evcouplings.compare.pdb import load_structures
from evcouplings.compare.distances import (
    intra_dists, multimer_dists, remap_chains,
    inter_dists, remap_complex_chains
)
from evcouplings.compare.sifts import SIFTS, SIFTSResult
from evcouplings.compare.ecs import (
    coupling_scores_compared, add_precision
)
from evcouplings.visualize import pairs, misc
import yaml


In [19]:
import time
alphabet = "ARNDCQEGHILKMFPSTWYV-"
states = len(alphabet)
a2n = {}
for a,n in zip(alphabet,range(states)):
    a2n[a] = n
seq='RNNGALPPDLSLIVKARHGGCNYIFSLLTGYPEPPAGAVVQEGLNFNPYFPGTGIAMARVLYDGLVEYDDGTPATASQMAKDVVEFLNWAAEPEMDERKRMGWKVMAIGGLLFGMSVWVKRYKWSTV'
def aa2num(aa):
    '''convert aa into num'''
    if aa in a2n: return a2n[aa]
    else: return a2n['-']
start=time.time()
list(map(aa2num,seq))
print('time spent map: {}'.format(time.time()-start))
start = time.time()
[aa2num(aa) for aa in seq]
print('time spent list iter: {}'.format(time.time()-start))

time spent map: 0.00014734268188476562
time spent list iter: 0.0002524852752685547


In [13]:
outcfg_yaml = yaml.safe_load(open('/home/as974/marks/users/kbrock/ecoli_complex/calibration/output/allpdb0042_final.outcfg','r'))

NameError: name 'yaml' is not defined

In [24]:
import numpy as np
a = np.arange(1,6,dtype=np.int)
b = np.ones_like(a)*6
a/b

array([0.16666667, 0.33333333, 0.5       , 0.66666667, 0.83333333])

In [14]:
(outcfg_yaml)

{'alignment_file': 'output/allpdb0042/concatenate/allpdb0042.a2m',
 'archive_file': 'output/allpdb0042.tar.gz',
 'complex_remapped_pdb_files': {'output/allpdb0042/compare/aux/allpdb0042_1bcc_C_74_D_0.pdb': 124,
  'output/allpdb0042/compare/aux/allpdb0042_1be3_C_6_D_1.pdb': 49,
  'output/allpdb0042/compare/aux/allpdb0042_1bgy_C_7_D_2.pdb': 50,
  'output/allpdb0042/compare/aux/allpdb0042_1bgy_C_7_P_3.pdb': 51,
  'output/allpdb0042/compare/aux/allpdb0042_1bgy_O_8_D_2.pdb': 52,
  'output/allpdb0042/compare/aux/allpdb0042_1bgy_O_8_P_3.pdb': 53,
  'output/allpdb0042/compare/aux/allpdb0042_1ezv_C_50_D_47.pdb': 138,
  'output/allpdb0042/compare/aux/allpdb0042_1kb9_C_51_D_48.pdb': 137,
  'output/allpdb0042/compare/aux/allpdb0042_1kyo_C_52_D_49.pdb': 135,
  'output/allpdb0042/compare/aux/allpdb0042_1kyo_C_52_O_50.pdb': 136,
  'output/allpdb0042/compare/aux/allpdb0042_1l0l_C_9_D_4.pdb': 54,
  'output/allpdb0042/compare/aux/allpdb0042_1l0n_C_10_D_5.pdb': 55,
  'output/allpdb0042/compare/aux/allpdb

In [15]:
def complex_contact_map(intra1_ecs, intra2_ecs, inter_ecs,
                        d_intra_i, d_multimer_i,
                        d_intra_j, d_multimer_j,
                        d_inter, **kwargs):
    """
    intra1_ecs: pandas.DataFrame
        Table of intra-molecular evolutionary couplings to plot
        (using columns "i" and "j") from monomer 1
    intra2_ecs: pandas.DataFrame
        Table of intra-molecular evolutionary couplings to plot
        (using columns "i" and "j") from monomer 2
    inter_ecs: pandas.DataFrame
        Table of inter-molecular evolutionary couplings to plot
        (using columns "i" and "j")
    d_intra_i:evcouplings.compare.distances.DistanceMap
        Monomer 1 distance map (intra-chain distances)
    d_multimer_i:evcouplings.compare.distances.DistanceMap
        Monomer 1 multimer distance map (inter-chain distances for monomer 1)
    d_inter:evcouplings.compare.distances.DistanceMap
        Inter-molecular distance map (inter-chain distances)
    boundaries: {"union", "intersection", "ecs", "structure"} or tuple
                 or list(tuple, tuple), optional (default: "union")
        Set axis range (min/max) of contact map as follows:
        - "union": Positions either in ECs or 3D structure
        - "intersection": Positions both in ECs and 3D structure
        - "ecs": Positions in ECs
        - "structure": Positions in 3D structure
        - tuple(float, float): Specify upper/lower bound manually
        - [(float, float), (float, float)]: Specify upper/lower bounds
          for both x-axis (first tuple) and y-axis (second tuple)
    """
    # check that boundaries is supplied
    boundaries = kwargs["boundaries"]

    # Find the appropriate boundaries for each subset
    intra1_boundaries = list(
        find_boundaries(
            boundaries, ecs=intra1_ecs, monomer=d_intra_i,
            multimer=d_multimer_i, symmetric=True
        )
    )

    intra2_boundaries = list(
        find_boundaries(
            boundaries, ecs=intra2_ecs, monomer=d_intra_j,
            multimer=d_multimer_j, symmetric=True
        )
    )

    # Don't compute inter boundaries unless we have inter 
    # ecs or distances
    if (inter_ecs is not None and not inter_ecs.empty) or d_inter is not None:
        inter_boundaries = list(
            find_boundaries(
                boundaries, ecs=inter_ecs, monomer=d_inter,
                multimer=None, symmetric=False
            )
        )

        def _boundary_union(original_boundaries, new_boundaries_axis1,
                            new_boundaries_axis2, axis1=True, axis2=True,
                            symmetric=False):
            # determine whether to use the original boundaries or the
            # corresponding monomer boundaries - whichever spans more
            # of the protein.
            # Default is to update both axes
            updated_boundaries = original_boundaries
            # increase the axis 1 boundaries if the new boundaries 
            # cover more range
            if axis1:
                updated_boundaries[0] = (
                    min(original_boundaries[0][0], new_boundaries_axis1[0][0]),
                    max(original_boundaries[0][1], new_boundaries_axis1[0][1])
                )
                # if symmetric, update the axis2 boundaries with the same value
                if symmetric:
                    updated_boundaries[1] = updated_boundaries[0]
            if axis2:
                updated_boundaries[1] = (
                    min(original_boundaries[1][0], new_boundaries_axis2[1][0]),
                    max(original_boundaries[1][1], new_boundaries_axis2[1][1])
                )
                if symmetric:
                    updated_boundaries[0] = updated_boundaries[1]

            return updated_boundaries

        # update the inter boundaries in case the intra boundaries
        # are outside the range of plotted inter ECs
        inter_boundaries = _boundary_union(
            inter_boundaries, intra1_boundaries, intra2_boundaries
        )

        # also modify intra boundaries in case the inter ECs are outside
        # the range of plotted monomer contacts or ECs
        intra1_boundaries = _boundary_union(
            intra1_boundaries, inter_boundaries, inter_boundaries,
            axis1=True, axis2=False, symmetric=True
        )

        intra2_boundaries = _boundary_union(
            intra2_boundaries, inter_boundaries, inter_boundaries, 
            axis1=False, axis2=True, symmetric=True
        )

    else:
        # if not plotting any inter ECs or contacts, just use the intra boundaries
        inter_boundaries = [
            (intra1_boundaries[0][0], intra1_boundaries[0][1]),
            (intra2_boundaries[0][0], intra2_boundaries[0][1])
        ]

    # Calculate the length ratios of the monomers
    mon1_len = intra1_boundaries[0][1] - intra1_boundaries[0][0]
    mon2_len = intra2_boundaries[0][1] - intra2_boundaries[0][0]

    if (mon1_len == 0) and (mon2_len == 0):
        raise ValueError(
            "Warning, you must provide at least one contact to plot "
            "for at least one of the monomers. Contact map not generated."
        )

    ratio1 = mon1_len / (mon1_len + mon2_len)
    ratio2 = mon2_len / (mon1_len + mon2_len)

    # Initiate the axes using the above ratios
    fig = plt.figure(figsize=(8, 8))
    gs = gridspec.GridSpec(
        2, 2, width_ratios=[ratio1, ratio2],
        height_ratios=[ratio1, ratio2]
    )
    ax1 = plt.subplot(gs[0])  # intra 1, upper left
    ax2 = plt.subplot(gs[1])  # inter, upper right
    ax3 = plt.subplot(gs[2])  # inter, lower left
    ax4 = plt.subplot(gs[3])  # intra 2, lower right

    # intra 1, upper left
    if not (intra1_ecs is None and d_intra_i is None and d_multimer_i is None):
        new_kwargs = deepcopy(kwargs)
        new_kwargs["boundaries"] = intra1_boundaries
        plot_contact_map(
            ax=ax1, symmetric=True,
            ecs=intra1_ecs, monomer=d_intra_i,
            multimer=d_multimer_i, **new_kwargs
        )

    # intra 2, lower right
    if not (intra2_ecs is None and d_intra_j is None and d_multimer_j is None):
        new_kwargs = deepcopy(kwargs)
        new_kwargs["boundaries"] = intra2_boundaries
        plot_contact_map(
            ax=ax4, symmetric=True,
            ecs=intra2_ecs, monomer=d_intra_j,
            multimer=d_multimer_j, **new_kwargs
        )

    # inter, lower left
    if not (inter_ecs is None and d_inter is None):
        new_kwargs = deepcopy(kwargs)
        new_kwargs["boundaries"] = inter_boundaries
        plot_contact_map(
            ax=ax3, symmetric=False,
            ecs=inter_ecs, multimer=d_inter,
            **new_kwargs
        )

        # inter, upper right
        if inter_ecs is None:
            inter_ecs_transposed = None
        else:
            inter_ecs_transposed = inter_ecs.rename(columns={"i": "j", "j": "i"})

        if d_inter is None:
            d_inter_T = None
        else:
            d_inter_T = d_inter.transpose()

        new_kwargs = {
            **kwargs,
            "boundaries": list(reversed(inter_boundaries)),
        }
        plot_contact_map(
            ax=ax2, symmetric=False,
            ecs=inter_ecs_transposed,
            multimer=d_inter_T, **new_kwargs
        )



In [18]:
import numpy as np
a=np.array([0,1,2,1])
(np.arange(16).reshape([4,4])==a).mean(axis=-1)

array([0.75, 0.  , 0.  , 0.  ])