In [70]:
from collections import defaultdict
from typing import Iterable, Dict, List

import numpy as np

from nupack import Complex as NupackComplex
from nupack import Model as NupackModel
from nupack import ComplexSet as NupackComplexSet
from nupack import Strand as NupackStrand
from nupack import SetSpec as NupackSetSpec
from nupack import complex_analysis as nupack_complex_analysis
from nupack import PairsMatrix as NupackPairsMatrix


import dsd.vienna_nupack as dv
import dsd.constraints as dc

# constants
DOMAIN_LENGTH = 15
DOMAIN_POOL: dc.DomainPool = dc.DomainPool('DOMAIN_POOL', DOMAIN_LENGTH)
TEMPERATURE = 37.0
NUPACK_MODEL = NupackModel(material='dna', celsius=TEMPERATURE)
NUMBER_OF_TRIALS = 100

def nupack_base_pair_probabilities(strands: Iterable[dc.Strand], strands_to_assign: Iterable[dc.Strand], base_index1: int, base_index2: int) -> np.ndarray:
    """Given a design and a specific base pair, assigns random DNA sequences to
    the design and computes the equilibrium base-pairing probability for each
    DNA sequence assignment.
    
    :param strands: The strands that make up the design.
    :type strands: Iterable[dc.Strand]
    :param strands_to_assign: The strands to assign DNA to. Domains
        complementary to these strands are automatically reassigned DNA. As
        such, strands_to_assign is usually a subset of strands.
    :type strands: Iterabble[dc.Strand]
    :param base_index1: The index of one of the bases that form the base pair
    :type base_index1: int
    :param base_index2: The index of the other base that forms the base pair
    :type base_index2: int

    :return: An array of NUMBER_OF_TRIALS base pair probabilities
    :rtype: numpy.ndarray
    """
    for s in strands:
        for d in s.domains:
            d.pool = DOMAIN_POOL

    base_pair_probabilities = [0] * NUMBER_OF_TRIALS
    base_pair_probabilities_by_base: Dict[str, List[float]] = defaultdict(list)
    for i in range(NUMBER_OF_TRIALS):
        base_at_base_index1: str
        
        # Assign random DNA sequence
        for s in strands_to_assign:
            rand_dna_seq = dv.random_dna_seq(s.length())
            base_at_base_index1 = rand_dna_seq[base_index1]
            s.assign_dna(rand_dna_seq)
            
        # Initialize NUPACK NupackComplexSet
        nupack_strands = [NupackStrand(strand.sequence(), name=strand.name) for strand in strands]
        nupack_complex: NupackComplex = NupackComplex(nupack_strands)
        nupack_complex_set = NupackComplexSet(nupack_strands, complexes=(nupack_complex,))        
        
        # Call NUPACK complex_analysis        
        nupack_complex_result: np.ndarray = nupack_complex_analysis(nupack_complex_set, compute=['pairs'], model=NUPACK_MODEL)[nupack_complex].pairs.to_array()
            
        # Collect results
        p = nupack_complex_result[base_index1][base_index2]
        base_pair_probabilities[i] = p
        base_pair_probabilities_by_base[base_at_base_index1].append(p)
        
    return np.array(base_pair_probabilities), base_pair_probabilities_by_base

def summarize_bpps(bpps: np.ndarray) -> None:
    print('bpps:', bpps)
    print('mean', np.mean(bpps))
    print('standard deviation:', np.std(bpps))
    print()
    
def summarize_bpps_by_base(bpps_by_base: Dict[str, List[float]]) -> None:
    for base, bpps in bpps_by_base.items():
        print('base:', base)
        summarize_bpps(np.array(bpps))

In [71]:
print('INTERIOR_TO_STRAND')
# INTERIOR_TO_STRAND
#                       a      b
#                     0  14 15  29
#                     |   |  |   |
#                    [-----##----->
#                     |||||  |||||
#                    <-----##-----]
#                     |   |  |   |
#                     59 45  44  30
#                       a*^   b*
#                         |
#                     base pair
top_strand: dc.Strand = dc.Strand(['a', 'b'], name='top strand')
bot_strand: dc.Strand = dc.Strand(['b*', 'a*'], name='bot strand')

interior_to_strand_bpps, interior_to_strand_bpps_by_base = nupack_base_pair_probabilities((top_strand, bot_strand), (top_strand,), 14, 45)
summarize_bpps(interior_to_strand_bpps)
summarize_bpps_by_base(interior_to_strand_bpps_by_base)

INTERIOR_TO_STRAND
bpps: [0.98193134 0.99869562 0.99258916 0.99620717 0.99761221 0.99420897
 0.99816217 0.98509052 0.99693579 0.99253987 0.99871551 0.99971767
 0.99928016 0.99815321 0.99036786 0.99261542 0.99046221 0.99902519
 0.99947764 0.99971746 0.998748   0.99260293 0.99875397 0.99693411
 0.99983309 0.99947951 0.98505405 0.99902242 0.99621368 0.99902688
 0.99769474 0.99814198 0.9992542  0.99068638 0.9976983  0.99929553
 0.99875166 0.99074386 0.99044504 0.99523224 0.99043724 0.99816168
 0.99043409 0.99235904 0.99925188 0.99925188 0.99236169 0.99948107
 0.99971762 0.99626474 0.99971884 0.99696376 0.99931405 0.99420201
 0.99258798 0.9962072  0.99076603 0.9975668  0.99522381 0.9996828
 0.99947951 0.9925391  0.99080197 0.99695479 0.99814954 0.99871727
 0.99940845 0.99902687 0.9997181  0.99621184 0.99947711 0.99036731
 0.9950931  0.99902749 0.9981533  0.99971906 0.99619654 0.99511061
 0.99755439 0.99764211 0.99258771 0.99524275 0.99901218 0.99696103
 0.99899757 0.99256779 0.99815661 0.99

In [72]:
print('BLUNT_END')
#                       a
#                     0  14
#                     |   |
#                    #----->
#                     |||||
#                    #-----]
#                     |   |
#                     29  15
#
#                         ^
#                         |
#                     base pair

top_strand: dc.Strand = dc.Strand(['a'], name='top strand')
bot_strand: dc.Strand = dc.Strand(['a*'], name='bot strand')

blunt_end_bpps, blunt_end_bpps_by_base = nupack_base_pair_probabilities((top_strand, bot_strand), (top_strand,), 14, 15)
summarize_bpps(blunt_end_bpps)
summarize_bpps_by_base(blunt_end_bpps_by_base)

BLUNT_END
bpps: [0.7451518  0.49949551 0.27183734 0.50049393 0.59282148 0.36454945
 0.43725297 0.14678874 0.34027866 0.76966591 0.65732003 0.27148397
 0.36579206 0.4242972  0.7170038  0.64902593 0.26674161 0.76774067
 0.67471617 0.27625096 0.46041768 0.37501201 0.68947574 0.63993635
 0.34578877 0.6749428  0.33089954 0.3774112  0.46896851 0.67463195
 0.14679796 0.46932762 0.63858013 0.32471797 0.33020396 0.2672951
 0.49300955 0.32572008 0.37402766 0.85325645 0.32842396 0.14553639
 0.44788404 0.36136209 0.34563473 0.67410238 0.56294441 0.85351763
 0.64832265 0.3740317  0.3314559  0.35093397 0.40571916 0.27965776
 0.35114257 0.27970717 0.14510631 0.65550306 0.27602831 0.33279224
 0.26717209 0.3283448  0.43710872 0.42463466 0.34857236 0.33014922
 0.42906381 0.7768779  0.63777233 0.3317301  0.37402993 0.6541168
 0.65527631 0.36182941 0.40404669 0.49327525 0.36670416 0.14560284
 0.32385772 0.2797072  0.57989437 0.36778267 0.40597258 0.43674893
 0.43157757 0.36115548 0.56525412 0.27582479 0.6