In [2]:
def remove_pseudoknot_pairs(ctList, pseudo_duplex):
    # 将假结碱基对放入集合中
    pseudo_set = set()
    for duplex in pseudo_duplex:
        for pair in duplex:
            pseudo_set.add(pair)
    
    # 移除假结碱基对，保留普通碱基对
    normal_pairs = [pair for pair in ctList if pair not in pseudo_set]
    
    return normal_pairs

# 示例使用
ctList = [(3, 8), (4, 5), (10, 15), (11, 14)]
pseudo_duplex = [[(3, 8), (4, 7)]]

normal_pairs = remove_pseudoknot_pairs(ctList, pseudo_duplex)
print(normal_pairs)  # 输出 [(10, 15), (11, 14)]


[(4, 5), (10, 15), (11, 14)]


In [8]:
def ct2pair(ctList):
    """
    ctList              -- paired-bases: [(3, 8), (4, 7)]
    
    Convert ctlist structure to normal pair and pseudo pairs

    Return:
        normal_pairs -- List of normal pairs
        pseudo_pairs -- List of pseudo pairs
    """
    if not ctList:
        return [], []
    
    # Sort and filter ctList
    ctList = sorted(ctList, key=lambda x: x[0])
    ctList = [it for it in ctList if it[0] < it[1]]
    
    # Parse pseudoknots and remove them from the normal pairs
    pseudo_pairs = parse_pseudoknot(ctList)
    normal_pairs = remove_pseudoknot_pairs(ctList, pseudo_pairs)

    return normal_pairs, pseudo_pairs

def remove_pseudoknot_pairs(ctList, pseudo_duplex):
    """
    Remove pseudoknot pairs from ctList and return normal pairs.
    """
    pseudo_set = set(pair for duplex in pseudo_duplex for pair in duplex)
    return [pair for pair in ctList if pair not in pseudo_set]

# 示例使用
ctList = []
normal_pairs, pseudo_pairs = ct2pair(ctList)
print("Normal Pairs:", normal_pairs)
print("Pseudo Pairs:", pseudo_pairs)


IndexError: list index out of range

In [4]:
def parse_pseudoknot(ctList):
    """
    copy from IPyRSSA

    ctList              -- paired-bases: [(3, 8), (4, 7)]
    
    Parse pseusoknots from clList
    Return:
        [ [(3, 8), (4, 7)], [(3, 8), (4, 7)], ... ]
    """
    ctList.sort(key=lambda x:x[0])
    ctList = [ it for it in ctList if it[0]<it[1] ]
    paired_bases = set()
    for lb,rb in ctList:
        paired_bases.add(lb)
        paired_bases.add(rb)
    
    # Collect duplex
    duplex = []
    cur_duplex = [ ctList[0] ]
    for i in range(1, len(ctList)):
        bulge_paired = False
        for li in range(ctList[i-1][0]+1, ctList[i][0]):
            if li in paired_bases:
                bulge_paired = True
                break
        if ctList[i][1]+1>ctList[i-1][1]:
            bulge_paired = True
        else:
            for ri in range(ctList[i][1]+1, ctList[i-1][1]):
                if ri in paired_bases:
                    bulge_paired = True
                    break
        if bulge_paired:
            duplex.append(cur_duplex)
            cur_duplex = [ ctList[i] ]
        else:
            cur_duplex.append(ctList[i])
    if cur_duplex:
        duplex.append(cur_duplex)
    
    # Discriminate duplex are pseudoknot
    Len = len(duplex)
    incompatible_duplex = []
    for i in range(Len):
        for j in range(i+1, Len):
            bp1 = duplex[i][0]
            bp2 = duplex[j][0]
            if bp1[0]<bp2[0]<bp1[1]<bp2[1] or bp2[0]<bp1[0]<bp2[1]<bp1[1]:
                incompatible_duplex.append((i, j))
    
    pseudo_found = []
    while incompatible_duplex:
        # count pseudo
        count = {}
        for l,r in incompatible_duplex:
            count[l] = count.get(l,0)+1
            count[r] = count.get(r,0)+1
        
        # find most possible pseudo
        count = list(count.items())
        count.sort( key=lambda x: (x[1],-len(duplex[x[0]])) )
        possible_pseudo = count[-1][0]
        pseudo_found.append(possible_pseudo)
        i = 0
        while i<len(incompatible_duplex):
            l,r = incompatible_duplex[i]
            if possible_pseudo in (l,r):
                del incompatible_duplex[i]
            else:
                i += 1
    
    pseudo_duplex = []
    for i in pseudo_found:
        pseudo_duplex.append(duplex[i])
    
    return pseudo_duplex