Ocena 3.0
Napisz program do konwersji z formatu dot-bracket do BPSEQ

In [38]:
import os

def read(filepath):
    with open(filepath, 'r') as f:
        lines = f.readlines()
        sequence = lines[0].strip()
        structure = lines[1].strip()
    return sequence, structure

def bpseq(sequence, structure):
    if len(sequence) != len(structure):
        raise ValueError("różne długości.")

    symbols = {
        '(': ')',
        '[': ']',
        '{': '}',
        '<': '>'
    }

    stacks = {key: [] for key in symbols}
    pairs = [0] * len(sequence)

    for i, char in enumerate(structure):
        if char in symbols:
            stacks[char].append(i)
        elif char in symbols.values():
            for open, close in symbols.items():
                if char == close:
                    if stacks[open]:
                        j = stacks[open].pop()
                        pairs[i] = j + 1
                        pairs[j] = i + 1
                    else:
                        raise ValueError(f"nawias '{char}' bez otwarcia w {i+1}")
                    break

    for symbol, stack in stacks.items():
        if stack:
            raise ValueError(f"niedomknięty nawias '{symbol}'")

    return [f"{i+1} {base} {pairs[i]}" for i, base in enumerate(sequence)]


def dbn_to_bpseq(input_folder, output_folder):
    os.makedirs(output_folder, exist_ok=True)

    for filename in os.listdir(input_folder):
        if filename.endswith('.dbn'):
            filepath = os.path.join(input_folder, filename)
            try:
                sequence, structure = read(filepath)
                bpseq_lines = bpseq(sequence, structure)

                out_filename = os.path.splitext(filename)[0] + '.bpseq'
                out_path = os.path.join(output_folder, out_filename)

                with open(out_path, 'w') as f:
                    f.write('\n'.join(bpseq_lines))

                print(f"done: {filename} przekonwertowano na {out_filename}")
            except Exception as e:
                print(f"błąd w pliku {filename}: {e}")

input_folder = "lab1_"
output_folder = "lab1_bpseq"
dbn_to_bpseq(input_folder, output_folder)


done: 5J7L-AA.dbn przekonwertowano na 5J7L-AA.bpseq
done: 1ET4-A.dbn przekonwertowano na 1ET4-A.bpseq
done: 3IGI-A.dbn przekonwertowano na 3IGI-A.bpseq
done: 6GAZ-AA.dbn przekonwertowano na 6GAZ-AA.bpseq
done: 6N5P-A.dbn przekonwertowano na 6N5P-A.bpseq
done: 5SWD-B.dbn przekonwertowano na 5SWD-B.bpseq
done: 5TPY-A.dbn przekonwertowano na 5TPY-A.bpseq
done: 6HA1-a.dbn przekonwertowano na 6HA1-a.bpseq
done: 6UES-A.dbn przekonwertowano na 6UES-A.bpseq
done: 3Q1Q-B.dbn przekonwertowano na 3Q1Q-B.bpseq
done: 3IVN-A.dbn przekonwertowano na 3IVN-A.bpseq
done: 3Q3Z-V.dbn przekonwertowano na 3Q3Z-V.bpseq
done: 5O60-A.dbn przekonwertowano na 5O60-A.bpseq
done: 6TPQ-U.dbn przekonwertowano na 6TPQ-U.bpseq
done: 6ERI-BA.dbn przekonwertowano na 6ERI-BA.bpseq
done: 6ZM6-AA.dbn przekonwertowano na 6ZM6-AA.bpseq
done: 7K16-P.dbn przekonwertowano na 7K16-P.bpseq
done: 6AGB-A.dbn przekonwertowano na 6AGB-A.bpseq
done: 6NEQ-A.dbn przekonwertowano na 6NEQ-A.bpseq
done: 6V3A-sN1.dbn przekonwertowano na 6V3

Ocena 3.5
Napisz program znajdujący motyw hairpin (ocena 3.5)

In [40]:
import os

def bpseq(filepath):
    sequence = []
    pairing = []
    with open(filepath, 'r') as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) == 3:
                _, base, pair = parts
                sequence.append(base)
                pairing.append(int(pair))
    return sequence, pairing


def hairpin(sequence, pairing):
    n = len(pairing)
    hairpins = []

    for i in range(1, n + 1):
        j = pairing[i - 1]
        if j > i:
            if all(pairing[k - 1] == 0 for k in range(i + 1, j)):
                loop_seq = ''.join(sequence[i - 1 : j - 1 + 1]) 
                hairpins.append((i, loop_seq, j))
    return hairpins


def files(filepath):
    sequence, pair = bpseq(filepath)
    hairpins = hairpin(sequence, pair)

    out_lines = [f"{start}-{loop}-{end}" for (start, loop, end) in hairpins]

    out_filename = os.path.splitext(filepath)[0] + ".hairpins"
    with open(out_filename, 'w') as f:
        f.write('\n'.join(out_lines))

    print(f"done: {os.path.basename(out_filename)} ({len(out_lines)} hairpinów)")


def find_hairpin(input_folder):
    for filename in os.listdir(input_folder):
        if filename.endswith(".bpseq"):
            path = os.path.join(input_folder, filename)
            try:
                files(path)
            except Exception as e:
                print(f"błąd w pliku {filename}: {e}")
find_hairpin("lab1_bpseq")  

done: 3IVN-A.hairpins (0 hairpinów)
done: 6ZM6-AA.hairpins (18 hairpinów)
done: 3Q1Q-B.hairpins (7 hairpinów)
done: 5TPY-A.hairpins (2 hairpinów)
done: 6GAZ-AA.hairpins (20 hairpinów)
done: 6N5P-A.hairpins (2 hairpinów)
done: 3Q3Z-V.hairpins (0 hairpinów)
done: 6AGB-A.hairpins (8 hairpinów)
done: 4FRN-B.hairpins (1 hairpinów)
done: 3IGI-A.hairpins (7 hairpinów)
done: 6ERI-BA.hairpins (28 hairpinów)
done: 6NEQ-A.hairpins (18 hairpinów)
done: 6HA1-a.hairpins (29 hairpinów)
done: 5SWD-B.hairpins (0 hairpinów)
done: 7K16-P.hairpins (1 hairpinów)
done: 1ET4-A.hairpins (0 hairpinów)
done: 6TPQ-U.hairpins (53 hairpinów)
done: 5J7L-AA.hairpins (29 hairpinów)
done: 5O60-A.hairpins (54 hairpinów)
done: 6V3A-sN1.hairpins (29 hairpinów)
done: 6UES-A.hairpins (1 hairpinów)


Ocena 4.0
Napisz program znajdujący motyw stem (ocena 4.0)

In [42]:
import os

def bpseq(filepath):
    sequence = []
    pairing = []
    with open(filepath, 'r') as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) == 3:
                _, base, pair = parts
                sequence.append(base)
                pairing.append(int(pair))
    return sequence, pairing


def sstem(sequence, pairing):
    stems = []
    n = len(sequence)
    visited = [False] * n 

    for i in range(n):
        j = pairing[i]
        if j > i + 1 and not visited[i] and not visited[j - 1]:
            left = i
            right = j - 1
            length = 0

            while (left + length < n and
                   right - length >= 0 and
                   pairing[left + length] == right - length + 1 and
                   not visited[left + length] and
                   not visited[right - length]):
                length += 1

            if length > 0:
                left_seq = ''.join(sequence[left : left + length])
                right_seq = ''.join(sequence[right - length + 1 : right + 1])[::-1]

                stems.append((
                    left + 1, left_seq, left + length,
                    right + 1, right_seq, right - length + 2
                ))

                for k in range(length):
                    visited[left + k] = True
                    visited[right - k] = True

    return stems


def files(filepath):
    sequence, pairing = bpseq(filepath)
    stems = sstem(sequence, pairing)

    output_lines = []
    for stem in stems:
        i_start, l_seq, i_end, j_start, r_seq, j_end = stem
        line = f"{i_start}-{l_seq}-{i_end} {j_start}-{r_seq}-{j_end}"
        output_lines.append(line)

    out_path = os.path.splitext(filepath)[0] + ".stems"
    with open(out_path, 'w') as f:
        f.write('\n'.join(output_lines))

    print(f"done: {os.path.basename(out_path)} ({len(stems)} stemów)")

def find_stems(input_folder):
    for filename in os.listdir(input_folder):
        if filename.endswith(".bpseq"):
            path = os.path.join(input_folder, filename)
            try:
                files(path)
            except Exception as e:
                print(f"błąd w pliku {filename}: {e}")
find_stems("lab1_bpseq") 


done: 3IVN-A.stems (7 stemów)
done: 6ZM6-AA.stems (84 stemów)
done: 3Q1Q-B.stems (24 stemów)
done: 5TPY-A.stems (6 stemów)
done: 6GAZ-AA.stems (83 stemów)
done: 6N5P-A.stems (10 stemów)
done: 3Q3Z-V.stems (6 stemów)
done: 6AGB-A.stems (30 stemów)
done: 4FRN-B.stems (11 stemów)
done: 3IGI-A.stems (32 stemów)
done: 6ERI-BA.stems (127 stemów)
done: 6NEQ-A.stems (83 stemów)
done: 6HA1-a.stems (123 stemów)
done: 5SWD-B.stems (6 stemów)
done: 7K16-P.stems (5 stemów)
done: 1ET4-A.stems (5 stemów)
done: 6TPQ-U.stems (250 stemów)
done: 5J7L-AA.stems (123 stemów)
done: 5O60-A.stems (261 stemów)
done: 6V3A-sN1.stems (123 stemów)
done: 6UES-A.stems (12 stemów)


Ocena 4.5
Napisz program znajdujący motyw single strand (ocena 4.5)

In [43]:
import os

def bpseq(filepath):
    sequence = []
    pairing = []
    with open(filepath, 'r') as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) == 3:
                _, base, pair = parts
                sequence.append(base)
                pairing.append(int(pair))
    return sequence, pairing

def find_single_strands(sequence, pairing):
    n = len(sequence)
    result = []

    for end in range(1, n):
        if pairing[end] != 0 and pairing[end] - 1 > end:
            if all(pairing[k] == 0 for k in range(0, end)):
                strand = ''.join(sequence[0:end + 1])
                result.append(f"1-{strand}-{end + 1}")
            break

    for i in range(n - 2):
        for length in range(2, n - i):
            j = i + length
            if j >= n:
                continue

            pi, pj = pairing[i], pairing[j]
            if pi == 0 or pj == 0:
                continue

            if all(pairing[k] == 0 for k in range(i + 1, j)):
                middle_range = set(range(i + 1, j))
                if pi - 1 not in middle_range and pj - 1 not in middle_range:
                    strand = ''.join(sequence[i:j + 1])
                    result.append(f"{i + 1}-{strand}-{j + 1}")
            else:
                break

    for start in range(n - 1, 0, -1):
        if pairing[start] != 0 and pairing[start] - 1 < start:
            if all(pairing[k] == 0 for k in range(start + 1, n)):
                strand = ''.join(sequence[start:n])
                result.append(f"{start + 1}-{strand}-{n}")
            break

    result.sort(key=lambda line: int(line.split('-')[0]))
    return result

def process_file(filepath):
    sequence, pairing = bpseq(filepath)
    strands = find_single_strands(sequence, pairing)

    out_path = os.path.splitext(filepath)[0] + ".strands"
    with open(out_path, 'w') as f:
        f.write('\n'.join(strands))

    print(f"done: {os.path.basename(out_path)} ({len(strands)} single strandów)")

def start(input_folder):
    for filename in os.listdir(input_folder):
        if filename.endswith(".bpseq"):
            path = os.path.join(input_folder, filename)
            try:
                process_file(path)
            except Exception as e:
                print(f"błąd w pliku {filename}: {e}")

start("lab1_bpseq")


done: 3IVN-A.strands (9 single strandów)
done: 6ZM6-AA.strands (138 single strandów)
done: 3Q1Q-B.strands (35 single strandów)
done: 5TPY-A.strands (10 single strandów)
done: 6GAZ-AA.strands (137 single strandów)
done: 6N5P-A.strands (18 single strandów)
done: 3Q3Z-V.strands (9 single strandów)
done: 6AGB-A.strands (54 single strandów)
done: 4FRN-B.strands (13 single strandów)
done: 3IGI-A.strands (51 single strandów)
done: 6ERI-BA.strands (220 single strandów)
done: 6NEQ-A.strands (146 single strandów)
done: 6HA1-a.strands (210 single strandów)
done: 5SWD-B.strands (9 single strandów)
done: 7K16-P.strands (7 single strandów)
done: 1ET4-A.strands (8 single strandów)
done: 6TPQ-U.strands (401 single strandów)
done: 5J7L-AA.strands (207 single strandów)
done: 5O60-A.strands (424 single strandów)
done: 6V3A-sN1.strands (205 single strandów)
done: 6UES-A.strands (16 single strandów)


Ocena 5.0
Zaimplementuj algorytm FCFS (ocena 5.0)

In [45]:
import os

def bpseq(filepath):
    sequence = []
    pairing = []
    with open(filepath, 'r') as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) == 3:
                _, base, pair = parts
                sequence.append(base)
                pairing.append(int(pair))
    return sequence, pairing

def base_pair(pair):
    base_pairs = []
    for i, j in enumerate(pair):
        if j > i + 1:
            base_pairs.append((i, j - 1))  
    return base_pairs

def conflicting(a, b):
    i, j = a['start'], a['end']
    k, l = b['start'], b['end']
    return (i < k < j < l) or (k < i < l < j)


def group_stems(base_pairs):
    base_pairs.sort()
    used = set()
    stems = []

    for i, j in base_pairs:
        if (i, j) in used:
            continue
        stem = [{'start': i, 'end': j}]
        used.add((i, j))

        a, b = i, j
        while (a + 1, b - 1) in base_pairs and (a + 1, b - 1) not in used:
            a += 1
            b -= 1
            stem.append({'start': a, 'end': b})
            used.add((a, b))

        stems.append(stem)

    return stems

def fcfs_brack(sequence, base_pairs):
    n = len(sequence)
    structure = ['.'] * n

    brackets = [
        ('(', ')'), ('[', ']'), ('{', '}'), ('<', '>'),
        ('A', 'a'), ('B', 'b'), ('C', 'c'), ('D', 'd'), ('E', 'e')
    ]

    grouped_stems = group_stems(base_pairs)

    grouped_stems.sort(key=lambda stem: stem[0]['start'])

    orders = []
    for i in range(len(grouped_stems)):
        used = set()
        for j in range(i):
            if any(conflicting(a, b) for a in grouped_stems[i] for b in grouped_stems[j]):
                used.add(orders[j])
        for order in range(len(brackets)):
            if order not in used:
                orders.append(order)
                break

    for stem, order in zip(grouped_stems, orders):
        b_open, b_close = brackets[order]
        for pair in stem:
            i, j = pair['start'], pair['end']
            structure[i] = b_open
            structure[j] = b_close

    return ''.join(structure)

def save_to_dbn(sequence, structure, filename):
    with open(filename, 'w') as f:
        f.write(f">{os.path.splitext(os.path.basename(filename))[0]}\n")
        f.write(''.join(sequence) + '\n')
        f.write(structure + '\n')
    print(f"done: {filename}")


def bpseq_to_dbn(filepath):
    sequence, pairing = bpseq(filepath)
    base_pairs = base_pair(pairing)
    structure = fcfs_brack(sequence, base_pairs)

    out_path = os.path.splitext(filepath)[0] + ".dbn"
    save_to_dbn(sequence, structure, out_path)

def start(input_folder):
    for filename in os.listdir(input_folder):
        if filename.endswith(".bpseq"):
            filepath = os.path.join(input_folder, filename)
            try:
                bpseq_to_dbn(filepath)
            except Exception as e:
                print(f"błąd w pliku {filename}: {e}")
start("lab1_bpseq")  

done: lab1_bpseq/3IVN-A.dbn
done: lab1_bpseq/6ZM6-AA.dbn
done: lab1_bpseq/3Q1Q-B.dbn
done: lab1_bpseq/5TPY-A.dbn
done: lab1_bpseq/6GAZ-AA.dbn
done: lab1_bpseq/6N5P-A.dbn
done: lab1_bpseq/3Q3Z-V.dbn
done: lab1_bpseq/6AGB-A.dbn
done: lab1_bpseq/4FRN-B.dbn
done: lab1_bpseq/3IGI-A.dbn
done: lab1_bpseq/6ERI-BA.dbn
done: lab1_bpseq/6NEQ-A.dbn
done: lab1_bpseq/6HA1-a.dbn
done: lab1_bpseq/5SWD-B.dbn
done: lab1_bpseq/7K16-P.dbn
done: lab1_bpseq/1ET4-A.dbn
done: lab1_bpseq/6TPQ-U.dbn
done: lab1_bpseq/5J7L-AA.dbn
done: lab1_bpseq/5O60-A.dbn
done: lab1_bpseq/6V3A-sN1.dbn
done: lab1_bpseq/6UES-A.dbn
