In [2]:
import math

def compute_log_probability(dna_string, gc_content):
    # Count the occurrences of G and C in the DNA string.
    gc_count = sum(1 for base in dna_string if base in "GC")
    at_count = len(dna_string) - gc_count

    # Handle the special cases where probability is zero.
    if (gc_content == 0 and gc_count > 0) or (gc_content == 1 and at_count > 0):
        return float("-inf")
    
    # Compute the log10 probability based on the formula:
    # log10(P) = gc_count * log10(gc_content / 2) + at_count * log10((1 - gc_content) / 2)
    return gc_count * math.log10(gc_content / 2) + at_count * math.log10((1 - gc_content) / 2)

# Read input data from file "input.txt".
with open("rosalind_prob.txt", "r") as infile:
    # Expecting the first line is the DNA string and the second line is the list of GC-content values
    lines = infile.read().splitlines()

dna_string = lines[0].strip()
gc_contents = list(map(float, lines[1].split()))

# Calculate the log10 probability for each GC-content.
results = [compute_log_probability(dna_string, x) for x in gc_contents]

# Print the results, each formatted to three decimal places, separated by space.
print(" ".join(f"{result:.3f}" for result in results))

-85.577 -74.030 -66.577 -62.556 -60.728 -57.558 -57.120 -55.286 -54.772 -54.608 -54.894 -55.261 -56.254 -58.368 -61.213 -66.524 -71.974


In [3]:
dna_string

'AAGGCGACGGTGCTATGGAATTAGCGTTTCTCCTCGCGGTCACACAGTCGAGGAACACAGAGTGTCCGGCCCCGAAACGTCTTTTGCCTAA'

In [4]:
gc_contents

[0.073,
 0.131,
 0.197,
 0.251,
 0.283,
 0.359,
 0.373,
 0.456,
 0.502,
 0.536,
 0.61,
 0.639,
 0.688,
 0.751,
 0.805,
 0.868,
 0.908]