In [1]:
from collections import defaultdict
from typing import Iterable, Dict, List, Tuple

import numpy as np

from nupack import Complex as NupackComplex
from nupack import Model as NupackModel
from nupack import ComplexSet as NupackComplexSet
from nupack import Strand as NupackStrand
from nupack import SetSpec as NupackSetSpec
from nupack import complex_analysis as nupack_complex_analysis
from nupack import PairsMatrix as NupackPairsMatrix


import dsd.vienna_nupack as dv
import dsd.constraints as dc

# constants
DOMAIN_LENGTH = 15
DOMAIN_POOL: dc.DomainPool = dc.DomainPool('DOMAIN_POOL', DOMAIN_LENGTH)
TEMPERATURE = 37.0
NUPACK_MODEL = NupackModel(material='dna', celsius=TEMPERATURE)
NUMBER_OF_TRIALS = 1000

def nupack_base_pair_probabilities(strands: Iterable[dc.Strand], base_index1: int, base_index2: int) -> Tuple[np.ndarray, Dict[str, List[float]]]:
    """Given a design and a specific base pair, assigns random DNA sequences to
    the design and computes the equilibrium base-pairing probability for each
    DNA sequence assignment.
    
    :param strands: The strands that make up the design.
    :type strands: Iterable[dc.Strand]
    :param base_index1: The index of one of the bases that form the base pair
    :type base_index1: int
    :param base_index2: The index of the other base that forms the base pair
    :type base_index2: int

    :return: An array of NUMBER_OF_TRIALS base pair probabilities and a
        dictionary which sorts the results by base (base located at
        base_index1)
    :rtype: Tuple[np.ndarray, Dict[str, List[float]]
    """
    domains_to_assign: Set[dc.Domain] = set()
    base_index1_strand_idx: int = 0
    base_index1_strand_base_idx: int
    
    num_bases_in_prev_strands = 0
    num_strands_seen = 0
    for s in strands:
        for d in s.domains:
            d.pool = DOMAIN_POOL
            domains_to_assign.add(d)

        if base_index1 < num_bases_in_prev_strands + s.length():
            base_index1_strand_idx = num_strands_seen
            base_index1_strand_base_idx = base_index1 - num_bases_in_prev_strands
        else:
            num_bases_in_prev_strands += s.length()
            num_strands_seen += 1
            
    print('DEBUG: Domains to assign:', domains_to_assign)
    print('DEBUG: Using strand index', base_index1_strand_idx, 'and base index', base_index1_strand_base_idx)
            

    base_pair_probabilities = [0] * NUMBER_OF_TRIALS
    base_pair_probabilities_by_base: Dict[str, List[float]] = defaultdict(list)
    for i in range(NUMBER_OF_TRIALS):
        # Assign random DNA sequence
        for d in domains_to_assign:
            rand_dna_seq = dv.random_dna_seq(DOMAIN_LENGTH)
            d.sequence = rand_dna_seq
        
#         print('Sequences:')
#         for s in strands:
#             print(s.sequence())
        
            
        # Initialize NUPACK NupackComplexSet
        nupack_strands = [NupackStrand(strand.sequence(), name=strand.name) for strand in strands]
        nupack_complex: NupackComplex = NupackComplex(nupack_strands)
        nupack_complex_set = NupackComplexSet(nupack_strands, complexes=(nupack_complex,))        
        
        # Call NUPACK complex_analysis        
        nupack_complex_result: np.ndarray = nupack_complex_analysis(nupack_complex_set, compute=['pairs'], model=NUPACK_MODEL)[nupack_complex].pairs.to_array()
            
        # Collect results
        assert nupack_complex_result[base_index1][base_index2] == nupack_complex_result[base_index2][base_index1]
        p = nupack_complex_result[base_index1][base_index2]
        base_pair_probabilities[i] = p
        base_at_base_index1: str = strands[base_index1_strand_idx].sequence()[base_index1_strand_base_idx]
        base_pair_probabilities_by_base[base_at_base_index1].append(p)
        
    return np.array(base_pair_probabilities), base_pair_probabilities_by_base

def summarize_bpps(bpps: np.ndarray, verbose: bool = False) -> None:
    if verbose:
        print('bpps:', bpps)
    print('mean', np.mean(bpps))
    print('standard deviation:', np.std(bpps))
    print()
    
def summarize_bpps_by_base(bpps_by_base: Dict[str, List[float]], verbose: bool = False) -> None:
    for base in 'ACGT':
        bpps = bpps_by_base[base]
        print('base:', base, 'sample size:', len(bpps))
        summarize_bpps(np.array(bpps), verbose=verbose)

In [2]:
print('INTERIOR_TO_STRAND')
# INTERIOR_TO_STRAND
#                       a      b
#                     0  14 15  29
#                     |   |  |   |
#                    [-----##----->
#                     |||||  |||||
#                    <-----##-----]
#                     |   |  |   |
#                     59 45  44  30
#                       a*^   b*
#                         |
#                     base pair
top_strand: dc.Strand = dc.Strand(['a', 'b'], name='top strand')
bot_strand: dc.Strand = dc.Strand(['b*', 'a*'], name='bot strand')

interior_to_strand_bpps, interior_to_strand_bpps_by_base = nupack_base_pair_probabilities((top_strand, bot_strand), 14, 45)
summarize_bpps(interior_to_strand_bpps)
summarize_bpps_by_base(interior_to_strand_bpps_by_base)

INTERIOR_TO_STRAND
DEBUG: Domains to assign: {b, a}
DEBUG: Using strand index 0 and base index 14
mean 0.9961128118360311
standard deviation: 0.004239204242703351

base: A sample size: 240
mean 0.9934019371838602
standard deviation: 0.004410829787463879

base: C sample size: 244
mean 0.998941566238868
standard deviation: 0.0007786136344249461

base: G sample size: 257
mean 0.998857879807377
standard deviation: 0.0008215863828127824

base: T sample size: 259
mean 0.993236021772683
standard deviation: 0.004473827336205851



In [3]:
print('BLUNT_END')
#                       a
#                     0  14
#                     |   |
#                    [----->
#                     |||||
#                    <-----]
#                     |   |
#                     29  15
#
#                         ^
#                         |
#                     base pair

top_strand: dc.Strand = dc.Strand(['a'], name='top strand')
bot_strand: dc.Strand = dc.Strand(['a*'], name='bot strand')

blunt_end_bpps, blunt_end_bpps_by_base = nupack_base_pair_probabilities((top_strand, bot_strand), 14, 15)
summarize_bpps(blunt_end_bpps)
summarize_bpps_by_base(blunt_end_bpps_by_base)

BLUNT_END
DEBUG: Domains to assign: {a}
DEBUG: Using strand index 0 and base index 14
mean 0.47829346465441347
standard deviation: 0.1926771227009018

base: A sample size: 244
mean 0.29050828928852124
standard deviation: 0.1052748490433621

base: C sample size: 242
mean 0.6548472188919046
standard deviation: 0.12116333665481252

base: G sample size: 246
mean 0.6313953256681467
standard deviation: 0.13217742534207552

base: T sample size: 268
mean 0.34930285440973596
standard deviation: 0.027087802026706194



In [21]:
print('NICK_3P')
#                       a      b
#                     0  14 15  29
#                     |   |  |   |
#                    [----->[----->
#                     |||||  |||||
#                    <-----##-----]
#                     |   |  |   |
#                     59 45  44  30
#                       a*    b*
#                         ^
#                         |
#                     base pair
top_strand_left = dc.Strand(['a'], name='top strand left')
top_strand_right = dc.Strand(['b'], name='top strand right')
bot_strand = dc.Strand(['b*', 'a*'], name='bot strand')
strands = (top_strand_left, top_strand_right, bot_strand)

nick_3p_bpps, nick_3p_bpps_by_base = nupack_base_pair_probabilities(strands, 14, 45)
summarize_bpps(nick_3p_bpps)
summarize_bpps_by_base(nick_3p_bpps_by_base)

# Test if swapping base index leads to consistent results
# Expected: Means and Standard deviations for A and T should swap as should G and C
# nick_3p_bpps, nick_3p_bpps_by_base = nupack_base_pair_probabilities(strands, 45, 14)
# summarize_bpps(nick_3p_bpps)
# summarize_bpps_by_base(nick_3p_bpps_by_base)

NICK_3P
DEBUG: Domains to assign: {b, a}
DEBUG: Using strand index 0 and base index 14
mean 0.8672595875782702
standard deviation: 0.10634534629661914

base: A sample size: 269
mean 0.8635567589293899
standard deviation: 0.08733644138109377

base: C sample size: 214
mean 0.8682294522106879
standard deviation: 0.07442676283315217

base: G sample size: 264
mean 0.9669879984114009
standard deviation: 0.035450768031590876

base: T sample size: 253
mean 0.7663117987057989
standard deviation: 0.10005010641002769



In [5]:
print('NICK_5P')
#                       a      b
#                     0  14 15  29
#                     |   |  |   |
#                    [-----##----->
#                     |||||  |||||
#                    <-----]<-----]
#                     |   |  |   |
#                     59 45  44  30
#                       a*    b*
#                         ^
#                         |
#                     base pair
top_strand: dc.Strand = dc.Strand(['a', 'b'], name='top strand')
bot_strand_right: dc.Strand = dc.Strand(['b*'], name='bot strand right')
bot_strand_left: dc.Strand = dc.Strand(['a*'], name='bot strand left')
strands = (top_strand, bot_strand_right, bot_strand_left)

nick_5p_bpps, nick_5p_bpps_by_base = nupack_base_pair_probabilities(strands, 14, 45)
summarize_bpps(nick_5p_bpps)
summarize_bpps_by_base(nick_5p_bpps_by_base)

NICK_5P
DEBUG: Domains to assign: {b, a}
DEBUG: Using strand index 0 and base index 14
mean 0.8350211405843126
standard deviation: 0.15657226244020617

base: A sample size: 251
mean 0.6515360292040967
standard deviation: 0.15440950742386886

base: C sample size: 264
mean 0.9044942897046845
standard deviation: 0.09068113177746401

base: G sample size: 238
mean 0.9548857376470001
standard deviation: 0.047699738417849726

base: T sample size: 247
mean 0.8317259077411401
standard deviation: 0.10399582944454847



In [6]:
print('DANGLE_3P')
#                       a      b
#                     0  14 15  29
#                     |   |  |   |
#                    [-----##----->
#                     |||||
#                    <-----]
#                     |   |
#                     44 30
#                       a*
#                         ^
#                         |
#                     base pair
top_strand: dc.Strand = dc.Strand(['a', 'b'], name='top strand')
bot_strand: dc.Strand = dc.Strand(['a*'], name='bot strand')
strands = (top_strand, bot_strand)

dangle_3p_bpps, dangle_3p_bpps_by_base = nupack_base_pair_probabilities(strands, 14, 30)
summarize_bpps(dangle_3p_bpps)
summarize_bpps_by_base(dangle_3p_bpps_by_base)

DANGLE_3P
DEBUG: Domains to assign: {b, a}
DEBUG: Using strand index 0 and base index 14
mean 0.6387919391462464
standard deviation: 0.20946167589718037

base: A sample size: 235
mean 0.41104681124226744
standard deviation: 0.13345241857566467

base: C sample size: 254
mean 0.7527945262986293
standard deviation: 0.18755407721519718

base: G sample size: 232
mean 0.8078024059941424
standard deviation: 0.14132500152716518

base: T sample size: 279
mean 0.5862938015549131
standard deviation: 0.10767071791266897



In [7]:
print('DANGLE_5P')
#                       a
#                     0  14
#                     |   |
#                    [----->
#                     |||||
#                    <-----##-----]
#                     |   |  |   |
#                     44 30  29  15
#                       a*    b*
#                         ^
#                         |
#                     base pair
top_strand: dc.Strand = dc.Strand(['a'], name='top strand')
bot_strand: dc.Strand = dc.Strand(['b*', 'a*'], name='bot strand')
strands = (top_strand, bot_strand)

dangle_5p_bpps, dangle_5p_bpps_by_base = nupack_base_pair_probabilities(strands, 14, 30)
summarize_bpps(dangle_5p_bpps)
summarize_bpps_by_base(dangle_5p_bpps_by_base)

DANGLE_5P
DEBUG: Domains to assign: {b, a}
DEBUG: Using strand index 0 and base index 14
mean 0.6829431695122933
standard deviation: 0.18393455827538716

base: A sample size: 243
mean 0.49973220331599666
standard deviation: 0.14625134068486934

base: C sample size: 260
mean 0.755544593153327
standard deviation: 0.1588221073332722

base: G sample size: 258
mean 0.839020192282199
standard deviation: 0.12186047013795326

base: T sample size: 239
mean 0.6217549802419817
standard deviation: 0.0802160923367763



In [8]:
print('DANGLE_5P_3P')
#                       a      b
#                     0  14 15  29
#                     |   |  |   |
#                    [-----##---->
#                     |||||
#                    <-----##----]
#                     |   |  |   |
#                     59 45  44  30
#                       a*    c
#                         ^
#                         |
#                     base pair
top_strand: dc.Strand = dc.Strand(['a', 'b'], name='top strand')
bot_strand: dc.Strand = dc.Strand(['c', 'a*'], name='bot strand')
strands = (top_strand, bot_strand)

dangle_5p_3p_bpps, dangle_5p_3p_bpps_by_base = nupack_base_pair_probabilities(strands, 14, 45)
summarize_bpps(dangle_5p_3p_bpps)
summarize_bpps_by_base(dangle_5p_3p_bpps_by_base)

DANGLE_5P_3P
DEBUG: Domains to assign: {c, b, a}
DEBUG: Using strand index 0 and base index 14
mean 0.827859014462382
standard deviation: 0.16387180226058354

base: A sample size: 235
mean 0.7156079671251929
standard deviation: 0.17166049139194256

base: C sample size: 235
mean 0.8494674976856745
standard deviation: 0.180761586416342

base: G sample size: 275
mean 0.9146558362940498
standard deviation: 0.11438953287062609

base: T sample size: 255
mean 0.817787942160645
standard deviation: 0.11688420106535498



In [9]:
print('OVERHANG_ON_THIS_STRAND_3P')
#                          ^
#                          |-29
#                          |   b
#                          |-15
#                          #
#                          #
#                       a  #    c
#                     0  14#  30  44
#                     |   |#  |   |
#                    [-----# [----->
#                     |||||   |||||
#                    <-----###-----]
#                     |   |   |   |
#                     74  60  59  45
#                       a*      c*
#                         ^
#                         |
#                     base pair
top_strand_left: dc.Strand = dc.Strand(['a', 'b'], name='top strand left')
top_strand_right: dc.Strand = dc.Strand(['c'], name='top strand right')
bot_strand: dc.Strand = dc.Strand(['c*', 'a*'], name='bot strand')
strands = (top_strand_left, top_strand_right, bot_strand)

overhang_on_this_strand_3p_bpps, overhang_on_this_strand_3p_bpps_by_base = nupack_base_pair_probabilities(strands, 14, 60)
summarize_bpps(overhang_on_this_strand_3p_bpps)
summarize_bpps_by_base(overhang_on_this_strand_3p_bpps_by_base)

OVERHANG_ON_THIS_STRAND_3P
DEBUG: Domains to assign: {c, b, a}
DEBUG: Using strand index 0 and base index 14
mean 0.8796977305967723
standard deviation: 0.1191295086953154

base: A sample size: 237
mean 0.8588224412957866
standard deviation: 0.09664464695941287

base: C sample size: 230
mean 0.863849551707779
standard deviation: 0.1531656134847647

base: G sample size: 279
mean 0.9664623352169003
standard deviation: 0.05334620522890883

base: T sample size: 254
mean 0.8182221401234903
standard deviation: 0.10053371000482116



In [10]:
print('OVERHANG_ON_THIS_STRAND_5P')
#                     base pair
#                         |
#                         v
#                       a       b
#                     0   14  15  29
#                     |   |   |   |
#                    [-----###----->
#                     |||||   |||||
#                    <-----# <-----]
#                     |   |#  |   |
#                     74 60#  44  30
#                       a* #    b*
#                          #
#                          #
#                          |-59
#                          |    c
#                          |-45
#                          ]
top_strand: dc.Strand = dc.Strand(['a', 'b'], name='top strand')
bot_strand_right: dc.Strand = dc.Strand(['b*'], name='bot strand right')
bot_strand_left: dc.Strand = dc.Strand(['c', 'a*'], name='bot strand left')
strands = (top_strand, bot_strand_right, bot_strand_left)

overhang_on_this_strand_5p_bpps, overhang_on_this_strand_5p_bpps_by_base = nupack_base_pair_probabilities(strands, 14, 60)
summarize_bpps(overhang_on_this_strand_5p_bpps)
summarize_bpps_by_base(overhang_on_this_strand_5p_bpps_by_base)

OVERHANG_ON_THIS_STRAND_5P
DEBUG: Domains to assign: {c, b, a}
DEBUG: Using strand index 0 and base index 14
mean 0.8491174449083084
standard deviation: 0.1474798798046346

base: A sample size: 258
mean 0.7221841873616994
standard deviation: 0.14848498358759954

base: C sample size: 231
mean 0.8849168602598924
standard deviation: 0.14344893278587983

base: G sample size: 267
mean 0.9412558557926917
standard deviation: 0.0843965501833503

base: T sample size: 244
mean 0.8486180998045341
standard deviation: 0.1049407773583661



In [11]:
print('OVERHANG_ON_ADJACENT_STRAND_3P')
#                            [
#                            |-15
#                            |    b
#                            |-29
#                            #
#                            #
#                       a    #  c
#                     0   14 #30  44
#                     |   |  #|   |      
#                    [-----> #----->
#                     |||||   |||||
#                    <-----###-----]
#                     |   |   |   |
#                     74  60  59  45
#                       a*      c*
#                         ^
#                         |
#                     base pair
top_strand_left: dc.Strand = dc.Strand(['a'], name='top strand left')
top_strand_right: dc.Strand = dc.Strand(['b', 'c'], name='top strand right')
bot_strand: dc.Strand = dc.Strand(['c*', 'a*'], name='bot strand')
strands = (top_strand_left, top_strand_right, bot_strand)

overhang_on_adj_strand_3p_bpps, overhang_on_adj_strand_3p_bpps_by_base = nupack_base_pair_probabilities(strands, 14, 60)
summarize_bpps(overhang_on_adj_strand_3p_bpps)
summarize_bpps_by_base(overhang_on_adj_strand_3p_bpps_by_base)

OVERHANG_ON_ADJACENT_STRAND_3P
DEBUG: Domains to assign: {c, b, a}
DEBUG: Using strand index 0 and base index 14
mean 0.6737881404388395
standard deviation: 0.23374248541394083

base: A sample size: 248
mean 0.6822904608044595
standard deviation: 0.17796055672542196

base: C sample size: 254
mean 0.615714733776556
standard deviation: 0.276719624548629

base: G sample size: 249
mean 0.8266234616497364
standard deviation: 0.2189721655071744

base: T sample size: 249
mean 0.5717241840534294
standard deviation: 0.15661734679997283



In [12]:
print('OVERHANG_ON_ADJACENT_STRAND_5P')
#                     base pair
#                         |
#                         v
#                       a       b
#                     0   14  15  29
#                     |   |   |   |   
#                    [-----###----->
#                     |||||   |||||
#                    <-----] #-----]
#                     |   |  #|   |
#                     74  60 #44  30
#                       a*   #  b*
#                            #
#                            #
#                            |-45
#                            |   c 
#                            |-59
#                            v
top_strand: dc.Strand = dc.Strand(['a', 'b'], name='top strand')
bot_strand_right: dc.Strand = dc.Strand(['b*', 'c'], name='bot strand right')
bot_strand_left: dc.Strand = dc.Strand(['a*'], name='bot strand left')
strands = (top_strand, bot_strand_right, bot_strand_left)

overhang_on_adj_strand_5p_bpps, overhang_on_adj_strand_5p_bpps_by_base = nupack_base_pair_probabilities(strands, 14, 60)
summarize_bpps(overhang_on_adj_strand_5p_bpps)
summarize_bpps_by_base(overhang_on_adj_strand_5p_bpps_by_base)

OVERHANG_ON_ADJACENT_STRAND_5P
DEBUG: Domains to assign: {c, b, a}
DEBUG: Using strand index 0 and base index 14
mean 0.6594095427328587
standard deviation: 0.26620652032430847

base: A sample size: 260
mean 0.45310058209541704
standard deviation: 0.20052512780989437

base: C sample size: 245
mean 0.6983199681739806
standard deviation: 0.28407014440427414

base: G sample size: 232
mean 0.8740731220413489
standard deviation: 0.16324169117179654

base: T sample size: 263
mean 0.6377567865849122
standard deviation: 0.2154406274475656



In [13]:
print('OVERHANG_ON_BOTH_STRAND_3P')
#                          ^ [
#                       29-| |-30
#                     b    | |    c
#                       15-| |-44
#                          # #
#                       a  # #   d
#                     0  14# #45  59
#                     |   |   |   |
#                    [-----# #----->
#                     |||||   |||||
#                    <-----###-----]
#                     |   |   |   |
#                     89 75   74  60
#                       a*      d*
#                         ^
#                         |
#                     base pair
top_strand_left: dc.Strand = dc.Strand(['a', 'b'], name='top strand left')
top_strand_right: dc.Strand = dc.Strand(['c', 'd'], name='top strand right')
bot_strand: dc.Strand = dc.Strand(['d*', 'a*'], name='bot strand')
strands = (top_strand_left, top_strand_right, bot_strand)

overhang_on_both_strand_3p_bpps, overhang_on_both_strand_3p_bpps_by_base = nupack_base_pair_probabilities(strands, 14, 75)
summarize_bpps(overhang_on_both_strand_3p_bpps)
summarize_bpps_by_base(overhang_on_both_strand_3p_bpps_by_base)

OVERHANG_ON_BOTH_STRAND_3P
DEBUG: Domains to assign: {d, c, b, a}
DEBUG: Using strand index 0 and base index 14
mean 0.718786815266854
standard deviation: 0.24389041845893072

base: A sample size: 248
mean 0.6734262815808035
standard deviation: 0.22202766705340954

base: C sample size: 246
mean 0.6651879159876307
standard deviation: 0.3054241613192534

base: G sample size: 257
mean 0.8708599018950341
standard deviation: 0.174127692453573

base: T sample size: 249
mean 0.6599593386137902
standard deviation: 0.18470329899816987



In [14]:
print('OVERHANG_ON_BOTH_STRAND_5P')
#
#
#                     base pair
#                         |
#                         v
#                       a       b
#                     0   14  15  29
#                     |   |   |   |   
#                    [-----###----->
#                     |||||   |||||
#                    <-----# #-----]
#                     |   |# #|   |
#                     89 75# #44  30
#                       a* # #  b*
#                          # #
#                          # #
#                       74-| |-45
#                      d   | |   c
#                       60-| |-59
#                          ] v
top_strand: dc.Strand = dc.Strand(['a', 'b'], name='top strand')
bot_strand_right: dc.Strand = dc.Strand(['b*', 'c'], name='bot strand right')
bot_strand_left: dc.Strand = dc.Strand(['d', 'a*'], name='bot strand left')
strands = (top_strand, bot_strand_right, bot_strand_left)

overhang_on_both_strand_5p_bpps, overhang_on_both_strand_5p_bpps_by_base = nupack_base_pair_probabilities(strands, 14, 75)
summarize_bpps(overhang_on_both_strand_5p_bpps)
summarize_bpps_by_base(overhang_on_both_strand_5p_bpps_by_base)

OVERHANG_ON_BOTH_STRAND_5P
DEBUG: Domains to assign: {d, c, b, a}
DEBUG: Using strand index 0 and base index 14
mean 0.7232054176709104
standard deviation: 0.24955365792138673

base: A sample size: 239
mean 0.5735755003952292
standard deviation: 0.2294889277795879

base: C sample size: 257
mean 0.7454249753985641
standard deviation: 0.272510228873779

base: G sample size: 240
mean 0.8559809925422911
standard deviation: 0.19997131525376544

base: T sample size: 264
mean 0.7163303643517792
standard deviation: 0.20676177125636974



In [15]:
print('THREE_ARM_JUNCTION_3P')
#                          ^ [
#                       29-|-|-30
#                     b    |-|    b*
#                       15-|-|-44
#                          # #
#                       a  # #   c
#                     0  14# #45  59
#                     |   |   |   |
#                    [-----# #----->
#                     |||||   |||||
#                    <-----###-----]
#                     |   |   |   |
#                     89 75   74  60
#                       a*      c*
#                         ^
#                         |
#                     base pair
top_strand_left: dc.Strand = dc.Strand(['a', 'b'], name='top strand left')
top_strand_right: dc.Strand = dc.Strand(['b*', 'c'], name='top strand right')
bot_strand: dc.Strand = dc.Strand(['c*', 'a*'], name='bot strand')
strands = (top_strand_left, top_strand_right, bot_strand)

three_arm_junction_3p_bpps, three_arm_junction_3p_bpps_by_base = nupack_base_pair_probabilities(strands, 14, 75)
summarize_bpps(three_arm_junction_3p_bpps)
summarize_bpps_by_base(three_arm_junction_3p_bpps_by_base)

THREE_ARM_JUNCTION_3P
DEBUG: Domains to assign: {c, b, a}
DEBUG: Using strand index 0 and base index 14
mean 0.8100072468983537
standard deviation: 0.15717264182104573

base: A sample size: 217
mean 0.7227015208036904
standard deviation: 0.13818412887991838

base: C sample size: 266
mean 0.813515544638223
standard deviation: 0.16541512373594847

base: G sample size: 267
mean 0.9501203327403193
standard deviation: 0.04368890867001301

base: T sample size: 250
mean 0.732415012674081
standard deviation: 0.13217541408766995



In [16]:
# EQUIVALENT TO THREE_ARM_JUNCTION_3P
print('THREE_ARM_JUNCTION_5P')
#
#
#                     base pair
#                         |
#                         v
#                       a       b
#                     0   14  15  29
#                     |   |   |   |   
#                    [-----###----->
#                     |||||   |||||
#                    <-----# #-----]
#                     |   |# #|   |
#                     89 75# #44  30
#                       a* # #  b*
#                          # #
#                          # #
#                       74-|-|-45
#                      c*  |-|   c
#                       60-|-|-59
#                          ] v
top_strand: dc.Strand = dc.Strand(['a', 'b'], name='top strand')
bot_strand_right: dc.Strand = dc.Strand(['b*', 'c'], name='bot strand right')
bot_strand_left: dc.Strand = dc.Strand(['c*', 'a*'], name='bot strand left')
strands = (top_strand, bot_strand_right, bot_strand_left)

three_arm_junction_5p_bpps, three_arm_junction_5p_bpps_by_base = nupack_base_pair_probabilities(strands, 14, 75)
summarize_bpps(three_arm_junction_5p_bpps)
summarize_bpps_by_base(three_arm_junction_5p_bpps_by_base)

THREE_ARM_JUNCTION_5P
DEBUG: Domains to assign: {c, b, a}
DEBUG: Using strand index 0 and base index 14
mean 0.806573996205012
standard deviation: 0.16394620014032277

base: A sample size: 235
mean 0.706057066042366
standard deviation: 0.15975027728783847

base: C sample size: 236
mean 0.8145695273128606
standard deviation: 0.15865230885663792

base: G sample size: 266
mean 0.9558100829303114
standard deviation: 0.041451120528201074

base: T sample size: 263
mean 0.7382764075275972
standard deviation: 0.13709936474353307



In [17]:
print('FOUR_ARM_JUNCTION')
#                          ^ [
#                       29-|-|-30
#                      b   |-|   b*
#                       15-|-|-44
#                          # #
#                          # #
#                       a  # #  c
#                     0  14# #45  59
#                     |   |# #|   |
#                    [-----# #----->
#                     |||||   |||||
#                    <-----# #-----]
#                     |   |# #|   |
#                   120 105# #74  60
#                      a*  # #  c*
#                          # #
#                          # #
#                      104-|-|-75
#                     d*   |-|    d
#                       90-|-|-89
#                          ] v
top_strand_left: dc.Strand = dc.Strand(['a', 'b'], name='top strand left')
top_strand_right: dc.Strand = dc.Strand(['b*', 'c'], name='top strand right')
bot_strand_right: dc.Strand = dc.Strand(['c*', 'd'], name='bot strand right')
bot_strand_left: dc.Strand = dc.Strand(['d*', 'a*'], name='bot strand left')
strands = (top_strand_left, top_strand_right, bot_strand_right, bot_strand_left)

four_arm_junction_bpps, four_arm_junction_bpps_by_base = nupack_base_pair_probabilities(strands, 14, 105)
summarize_bpps(four_arm_junction_bpps)
summarize_bpps_by_base(four_arm_junction_bpps_by_base)

FOUR_ARM_JUNCTION
DEBUG: Domains to assign: {d, c, b, a}
DEBUG: Using strand index 0 and base index 14
mean 0.8289373811004487
standard deviation: 0.26332614281806865

base: A sample size: 256
mean 0.8405790595244724
standard deviation: 0.17644166708058473

base: C sample size: 252
mean 0.7455298772354715
standard deviation: 0.3738274351676722

base: G sample size: 262
mean 0.9355753913875282
standard deviation: 0.15068291834181488

base: T sample size: 230
mean 0.7858906967622279
standard deviation: 0.2554836730407844



In [18]:
print('FIVE_ARM_JUNCTION')
top_strand_left: dc.Strand = dc.Strand(['a', 'b'], name='top strand left')
top_strand_right: dc.Strand = dc.Strand(['b*', 'c'], name='top strand right')
bot_strand_right: dc.Strand = dc.Strand(['c*', 'd'], name='bot strand right')
bot_strand_left: dc.Strand = dc.Strand(['d*', 'e'], name='bot strand left')
other_strand: dc.Strand = dc.Strand(['e*', 'a*'], name='other strand')
strands = (top_strand_left, top_strand_right, bot_strand_right, bot_strand_left, other_strand)

five_arm_junction_bpps, five_arm_junction_bpps_by_base = nupack_base_pair_probabilities(strands, 14, 135)
summarize_bpps(five_arm_junction_bpps)
summarize_bpps_by_base(five_arm_junction_bpps_by_base)

FIVE_ARM_JUNCTION
DEBUG: Domains to assign: {c, e, b, a, d}
DEBUG: Using strand index 0 and base index 14
mean 0.8395308428897759
standard deviation: 0.1713605662886775

base: A sample size: 241
mean 0.7969829025232458
standard deviation: 0.12767061289492304

base: C sample size: 271
mean 0.7930695748868479
standard deviation: 0.2397028015817529

base: G sample size: 264
mean 0.9655688421232794
standard deviation: 0.03785758345595545

base: T sample size: 224
mean 0.7929729208338934
standard deviation: 0.130010996932305

