In [3]:
"""
Calculates and prints the number of DNA bases in multiple DNA lines.
Any character/base other than 'A', 'G', 'T' or 'C' is classified as an error.

Args:
    dna_lines: A list of DNA lines.

Returns:
    None
"""
def print_base_occurrence_freq(dna_lines):
    base_freq = {'A': 0, 'G': 0, 'T': 0, 'C': 0}
    errors = 0
    total = 0

    for line in dna_lines:
        for char in line:
            if char in 'AGTC':
                total += 1
                base_freq[char] += 1
            else:
                errors += 1

    print('Base occurrence frequency:')
    print('TOTAL BASES: ' + str(total))
    print('ERRORS: ' + str(errors))
    for key, value in base_freq.items():
        print(key + ': ' + str(value))


"""
Calculates and prints the number of matches and mismatches between 2 DNA lines.
If there is an error in either of the lines, it counts towards the errors of the line and is neither a match nor a mismatch.

Args:
    line0: The first DNA line.
    line1: The second DNA line

Returns:
    None
"""
def print_matches_per_base(line0, line1):
    matches = {'A': 0, 'G': 0, 'T': 0, 'C': 0}
    mismatches = 0
    line0_errors = 0
    line1_errors = 0

    for i in range(0, len(line0)):
        if line0[i] in 'AGTC':
            if line0[i] == line1[i]:
                matches[line0[i]] += 1
            elif line1[i] in 'AGTC':
                mismatches += 1
            else:
                line1_errors += 1
        else:
            line0_errors += 1

    print('\nNumber of matches & mismatches:')
    for key, value in matches.items():
        print(key + '-' + key + ' matches: ' + str(value))
    print('mismatches : ' + str(mismatches))
    print('line 0 errors: ' + str(line0_errors))
    print('line 1 errors: ' + str(line1_errors))


if __name__ == '__main__':
    with open('testdna1.txt') as file:
        lines = [line.rstrip() for line in file]
    print_base_occurrence_freq(lines)
    print_matches_per_base(lines[0], lines[1])


Base occurrence frequency:
TOTAL BASES: 922
ERRORS: 30
A: 196
G: 305
T: 153
C: 268

Number of matches & mismatches:
A-A matches: 89
G-G matches: 143
T-T matches: 70
C-C matches: 127
mismatches : 30
line 0 errors: 14
line 1 errors: 3
