In [85]:
from collections import defaultdict
from typing import Iterable, Dict, List

import numpy as np

from nupack import Complex as NupackComplex
from nupack import Model as NupackModel
from nupack import ComplexSet as NupackComplexSet
from nupack import Strand as NupackStrand
from nupack import SetSpec as NupackSetSpec
from nupack import complex_analysis as nupack_complex_analysis
from nupack import PairsMatrix as NupackPairsMatrix


import dsd.vienna_nupack as dv
import dsd.constraints as dc

# constants
DOMAIN_LENGTH = 15
DOMAIN_POOL: dc.DomainPool = dc.DomainPool('DOMAIN_POOL', DOMAIN_LENGTH)
TEMPERATURE = 37.0
NUPACK_MODEL = NupackModel(material='dna', celsius=TEMPERATURE)
NUMBER_OF_TRIALS = 100

def nupack_base_pair_probabilities(strands: Iterable[dc.Strand], strands_to_assign: Iterable[dc.Strand], base_index1: int, base_index2: int) -> np.ndarray:
    """Given a design and a specific base pair, assigns random DNA sequences to
    the design and computes the equilibrium base-pairing probability for each
    DNA sequence assignment.
    
    :param strands: The strands that make up the design.
    :type strands: Iterable[dc.Strand]
    :param strands_to_assign: The strands to assign DNA to. Domains
        complementary to these strands are automatically reassigned DNA. As
        such, strands_to_assign is usually a subset of strands.
    :type strands: Iterabble[dc.Strand]
    :param base_index1: The index of one of the bases that form the base pair
    :type base_index1: int
    :param base_index2: The index of the other base that forms the base pair
    :type base_index2: int

    :return: An array of NUMBER_OF_TRIALS base pair probabilities
    :rtype: numpy.ndarray
    """
    for s in strands:
        for d in s.domains:
            d.pool = DOMAIN_POOL

    base_pair_probabilities = [0] * NUMBER_OF_TRIALS
    base_pair_probabilities_by_base: Dict[str, List[float]] = defaultdict(list)
    for i in range(NUMBER_OF_TRIALS):
        base_at_base_index1: str
        
        # Assign random DNA sequence
        for s in strands_to_assign:
            rand_dna_seq = dv.random_dna_seq(s.length())
            base_at_base_index1 = rand_dna_seq[base_index1]
            s.assign_dna(rand_dna_seq)
            
        # Initialize NUPACK NupackComplexSet
        nupack_strands = [NupackStrand(strand.sequence(), name=strand.name) for strand in strands]
        nupack_complex: NupackComplex = NupackComplex(nupack_strands)
        nupack_complex_set = NupackComplexSet(nupack_strands, complexes=(nupack_complex,))        
        
        # Call NUPACK complex_analysis        
        nupack_complex_result: np.ndarray = nupack_complex_analysis(nupack_complex_set, compute=['pairs'], model=NUPACK_MODEL)[nupack_complex].pairs.to_array()
            
        # Collect results
        p = nupack_complex_result[base_index1][base_index2]
        base_pair_probabilities[i] = p
        base_pair_probabilities_by_base[base_at_base_index1].append(p)
        
    return np.array(base_pair_probabilities), base_pair_probabilities_by_base

def summarize_bpps(bpps: np.ndarray, verbose: bool = False) -> None:
    if verbose:
        print('bpps:', bpps)
    print('mean', np.mean(bpps))
    print('standard deviation:', np.std(bpps))
    print()
    
def summarize_bpps_by_base(bpps_by_base: Dict[str, List[float]], verbose: bool = False) -> None:
    for base in 'ACGT':
        bpps = bpps_by_base[base]
        print('base:', base)
        summarize_bpps(np.array(bpps), verbose=verbose)

In [87]:
print('INTERIOR_TO_STRAND')
# INTERIOR_TO_STRAND
#                       a      b
#                     0  14 15  29
#                     |   |  |   |
#                    [-----##----->
#                     |||||  |||||
#                    <-----##-----]
#                     |   |  |   |
#                     59 45  44  30
#                       a*^   b*
#                         |
#                     base pair
top_strand: dc.Strand = dc.Strand(['a', 'b'], name='top strand')
bot_strand: dc.Strand = dc.Strand(['b*', 'a*'], name='bot strand')

interior_to_strand_bpps, interior_to_strand_bpps_by_base = nupack_base_pair_probabilities((top_strand, bot_strand), (top_strand,), 14, 45)
summarize_bpps(interior_to_strand_bpps)
summarize_bpps_by_base(interior_to_strand_bpps_by_base)

INTERIOR_TO_STRAND
mean 0.9957071372475166
standard deviation: 0.004481147891948408

base: A
mean 0.9926541842187869
standard deviation: 0.004628548147961639

base: C
mean 0.9987513733926704
standard deviation: 0.0008148877087885014

base: G
mean 0.9988333681123647
standard deviation: 0.0008460226908978989

base: T
mean 0.9930113559237022
standard deviation: 0.004578551880745603



In [88]:
print('BLUNT_END')
#                       a
#                     0  14
#                     |   |
#                    [----->
#                     |||||
#                    <-----]
#                     |   |
#                     29  15
#
#                         ^
#                         |
#                     base pair

top_strand: dc.Strand = dc.Strand(['a'], name='top strand')
bot_strand: dc.Strand = dc.Strand(['a*'], name='bot strand')

blunt_end_bpps, blunt_end_bpps_by_base = nupack_base_pair_probabilities((top_strand, bot_strand), (top_strand,), 14, 15)
summarize_bpps(blunt_end_bpps)
summarize_bpps_by_base(blunt_end_bpps_by_base)

BLUNT_END
mean 0.45661888257454036
standard deviation: 0.1894398538296574

base: A
mean 0.3115362819650175
standard deviation: 0.10028118418694827

base: C
mean 0.6379444396665029
standard deviation: 0.12150515029455494

base: G
mean 0.6779172570890192
standard deviation: 0.14953662610832805

base: T
mean 0.3448322507650235
standard deviation: 0.03261862017508212



In [99]:
print('NICK on top strand')
#                       a      b
#                     0  14 15  29
#                     |   |  |   |
#                    [----->[----->
#                     |||||  |||||
#                    <-----##-----]
#                     |   |  |   |
#                     59 45  44  30
#                       a*    b*
#                         ^
#                         |
#                     base pair
top_strand_left = dc.Strand(['a'], name='top strand left')
top_strand_right = dc.Strand(['b'], name='top strand right')
bot_strand = dc.Strand(['b*', 'a*'], name='bot strand')
strands = (top_strand_left, top_strand_right, bot_strand)
strands_to_assign = (bot_strand,)

nick_top_bpps, nick_top_bpps_by_base = nupack_base_pair_probabilities(strands, strands_to_assign, 14, 45)
summarize_bpps(nick_top_bpps)
summarize_bpps_by_base(nick_top_bpps_by_base)

NICK on top strand
mean 0.8726278310419039
standard deviation: 0.11046794935860238

base: A
mean 0.8421762783646859
standard deviation: 0.12354828192276407

base: C
mean 0.8887726922505834
standard deviation: 0.0981568392348244

base: G
mean 0.8742551453002463
standard deviation: 0.09408404445046697

base: T
mean 0.8926546519724641
standard deviation: 0.11311497484490884



In [103]:
print('NICK on bot strand')
#                       a      b
#                     0  14 15  29
#                     |   |  |   |
#                    [-----##----->
#                     |||||  |||||
#                    <-----]<-----]
#                     |   |  |   |
#                     59 45  44  30
#                       a*    b*
#                         ^
#                         |
#                     base pair
top_strand: dc.Strand = dc.Strand(['a', 'b'], name='top strand')
bot_strand_right: dc.Strand = dc.Strand(['b*'], name='bot strand right')
bot_strand_left: dc.Strand = dc.Strand(['a*'], name='bot strand left')
strands = (top_strand, bot_strand_right, bot_strand_left)
strands_to_assign = (top_strand,)

nick_bot_bpps, nick_bot_bpps_by_base = nupack_base_pair_probabilities(strands, strands_to_assign, 14, 45)
summarize_bpps(nick_bot_bpps)
summarize_bpps_by_base(nick_bot_bpps_by_base)

NICK on bot strand
mean 0.8327983211655237
standard deviation: 0.1423740078354688

base: A
mean 0.6792807573634829
standard deviation: 0.10045463594422412

base: C
mean 0.9065862207733689
standard deviation: 0.09606875754595509

base: G
mean 0.9577709632781609
standard deviation: 0.04999634366512471

base: T
mean 0.8310699105082314
standard deviation: 0.10390670243786679



In [108]:
print('DANGLE_3P')
#                       a      b
#                     0  14 15  29
#                     |   |  |   |
#                    [-----##----->
#                     |||||
#                    <-----]
#                     |   |
#                     44 30
#                       a*
#                         ^
#                         |
#                     base pair
top_strand: dc.Strand = dc.Strand(['a', 'b'], name='top strand')
bot_strand: dc.Strand = dc.Strand(['a*'], name='bot strand')
strands = (top_strand, bot_strand)
strands_to_assign = (top_strand,)

dangle_3p_bpps, dangle_3p_bpps_by_base = nupack_base_pair_probabilities(strands, strands_to_assign, 14, 30)
summarize_bpps(dangle_3p_bpps)
summarize_bpps_by_base(dangle_3p_bpps_by_base)

DANGLE_3P
mean 0.6755227757608049
standard deviation: 0.21268474189808237

base: A
mean 0.4013175109366286
standard deviation: 0.16649324144437963

base: C
mean 0.7799543533483958
standard deviation: 0.1576918716060733

base: G
mean 0.8573110086443392
standard deviation: 0.08731665255761688

base: T
mean 0.5744334560602915
standard deviation: 0.05659349904543354

