# HANDLING AMBIGUITY
<ol>
    <li><b>N:</b> Any nucleotide (A, T, G, or C)</li>
    <li><b>R:</b> Purine (A or G)</li>
    <li><b>Y:</b> Pyrimidine (C or T)</li>
    <li><b>S:</b> Strong (G or C)</li>
    <li><b>W:</b> Weak (A or T)</li>
    <li><b>K:</b> Keto (G or T)</li>
    <li><b>M:</b> Amino (A or C)</li>
    <li><b>B:</b> Not A (C, G, or T)</li>
    <li><b>D:</b> Not C (A, G, or T)</li>
    <li><b>H:</b> Not G (A, C, or T)</li>
    <li><b>V:</b> Not T (A, C, or G)</li>
</ol>
For RNA, replace T with U (e.g., Y = C or U).

In [None]:
# complement
# ATGNGTRCG
def valid_strand(strand):
    flag = False
    for x in strand:
        if x not in ["A", "T", "G", "C", "N", "R", "Y", "S", "W", "K", "M", "B", "D", "H", "V"]:
            flag = True
            break
    return not flag


def ambiguity(strand):
    flag = False
    for x in strand:
        if x in ["N", "R", "Y", "S", "W", "K", "M", "B", "D", "H", "V"]:
            flag = True
            break
    return flag


def complement(strand):
    n_dictionary = {"A" : "T", "T" : "A", "G" : "C", "C" : "G", "N" : "N", "R" : "Y", "Y" : "R", "S" : "W", "W" : "S", "M": "K", "K" : "M", "B" : "V", "V" : "B",
                    "D" : "H", "H" : "D"}
    complementary_strand = ""
    for x in strand:
        for base, complement_base in n_dictionary.items():
            if x == base:
                complementary_strand += complement_base
                break
    return complementary_strand


dna = input("enter strand: ").strip().upper()

if valid_strand(dna) == True:
    print("DNA strand: ", dna, end="\n")
    if ambiguity(dna) == True:
        print("\033[1mambiguity detected\033[0m", end="\n")
    else:
        print("\033[1mambiguity absent\033[0m", end="\n")
    print("complementary strand: ", complement(dna), end="\n")   
else:
    print("input strand invalid")

DNA strand:  ATGNGTRCG
[1mambiguity detected[0m
complementary strand:  TACNCAYGC


In [None]:
# transciption
# ATGNGTRCG
def valid_strand(template_strand):
    flag = False
    for x in template_strand:
        if x not in ["A", "T", "G", "C", "N", "R", "Y", "S", "W", "M", "K", "B", "D", "H", "V"]:
            flag = True
            break
    return not flag


def ambiguity(template_strand):
    flag = False
    for x in template_strand:
        if x in ["N", "R", "Y", "S", "W", "M", "K", "B", "D", "H", "V"]:
            flag = True
            break
    return flag


def complement(template_strand):
    coding_strand = ""
    n_dictionary = {"A" : "T", "T" : "A", "G" : "C", "C" : "G", "N" : "N", "R" : "Y", "Y" : "R", "S" : "W", "W" : "S", "M": "K", "K" : "M", "B" : "V", "V" : "B",
                    "D" : "H", "H" : "D"}
    for x in template_strand:
        for base, complement_base in n_dictionary.items():
            if x == base:
                coding_strand += complement_base
                break
    return coding_strand


def transcribe(template_strand):
    coding_strand = complement(template_strand)
    return coding_strand.replace("T", "U")


template_strand = input("enter template strand: ").strip().upper()

if valid_strand(template_strand) == True:
    if ambiguity(template_strand) == True:
        print("\033[1mambiguity detected\033[0m", end="\n")
    else:
        print("\033[1mambiguity absent\033[1m", end="\n")
    print("template Strand: ", template_strand, end="\n")
    coding_strand = complement(template_strand)
    print("coding Strand: ", coding_strand, end="\n")
    print("mRNA: ", transcribe(template_strand), end="\n")
else:
    print("input strand invalid")

[1mambiguity detected[0m
template Strand:  ATGNGTRCG
coding Strand:  TACNCAYGC
mRNA:  UACNCAYGC


In [6]:
# ATGNGTRCG
# translation
aa_codon_dictionary = {
    "A": ["GCU", "GCC", "GCA", "GCG"],
    "R": ["CGU", "CGC", "CGA", "CGG", "AGA", "AGG"],
    "N": ["AAU", "AAC"],
    "D": ["GAU", "GAC"],
    "B": ["AAU", "AAC", "GAU", "GAC"],
    "C": ["UGU", "UGC"],
    "Q": ["CAA", "CAG"],
    "E": ["GAA", "GAG"],
    "Z": ["CAA", "CAG", "GAA", "GAG"],
    "G": ["GGU", "GGC", "GGA", "GGG"],
    "H": ["CAU", "CAC"],
    "I": ["AUU", "AUC", "AUA"],
    "L": ["CUU", "CUC", "CUA", "CUG", "UUA", "UUG"],
    "M": ["AUG"], 
    "K": ["AAA", "AAG"], 
    "F": ["UUU", "UUC"],
    "P": ["CCU", "CCC", "CCA", "CCG"],
    "S": ["UCU", "UCC", "UCA", "UCG", "AGU", "AGC"],
    "T": ["ACU", "ACC", "ACA", "ACG"],
    "W": ["UGG"],
    "Y": ["UAU", "UAC"],
    "V": ["GUU", "GUC", "GUA", "GUG"],
    "--STOP--": ["UAA", "UGA", "UAG"]
}

def valid_strand(template_strand):
    flag = False
    for x in template_strand:
        if x not in ["A", "T", "G", "C", "N", "R", "Y", "S", "W", "M", "K", "B", "D", "H", "V"]:
            flag = True
            break
    return not flag


def ambiguity(template_strand):
    flag = False
    for x in template_strand:
        if x in ["N", "R", "Y", "S", "W", "M", "K", "B", "D", "H", "V"]:
            flag = True
            break
    return flag


def complement(template_strand):
    coding_strand = ""
    n_dictionary = {"A" : "T", "T" : "A", "G" : "C", "C" : "G", "N" : "N", "R" : "Y", "Y" : "R", "S" : "W", "W" : "S", "M": "K", "K" : "M", "B" : "V", "V" : "B", "H" : "D", "D" : "H"}
    for x in template_strand:
        for base, complementary_base in n_dictionary.items():
            if x == base:
                coding_strand += complementary_base
                break
    return coding_strand


def transcribe(template_strand):
    coding_strand = complement(template_strand)
    return coding_strand.replace("T", "U")


def check_mRNA_length(mRNA):
    return True if len(mRNA) > 2 else False


def translate(mrna):
    protein = ""
    for i in range(0, len(mrna) - 2, 3):
        codon = mrna[i : i+3]
        flag = False
        for x in codon:
            if x in ["N", "R", "Y", "S", "W", "M", "K", "B", "D", "H", "V"]:
                flag = True
        
        if flag == False:
            for aa, codons in aa_codon_dictionary.items():
                if codon in codons:
                    if aa == "--STOP--":
                        return protein
                    protein += aa
                    break
        else:
            protein += "X"
    return protein
                

template_strand = input("enter template strand: ").strip().upper()

if valid_strand(template_strand) == True:
    if ambiguity(template_strand) == True:
        print("\033[1mambiguity detected\033[0m", end="\n")
    else:
        print("\033[1mambiguity not detected\033[0m", end="\n")
    print("template strand: ", template_strand, end="\n")
    print("coding strand: ", complement(template_strand), end="\n")
    mrna = transcribe(template_strand)
    print("mRNA: ", mrna, end="\n")
    if check_mRNA_length(mrna) == True:
        print("protein: ", translate(mrna), end="\n")
    else:
        print("\033[1mLength of mRNA is less than 3 bases\033[0m")
else:
    print("input strand invalid", end="\n")

[1mambiguity detected[0m
template strand:  ATGNGTRCG
coding strand:  TACNCAYGC
mRNA:  UACNCAYGC
protein:  YXX


In [11]:
# reverse transcription
# UACNCAYGC
def valid_strand(mRNA):
    flag = False
    for x in mRNA:
        if x not in ["A", "U", "G", "C", "N", "R", "Y", "S", "W", "M", "K", "B", "D", "H", "V"]:
            flag = True
            break
    return not flag


def ambiguity(mRNA):
    flag = False
    for x in mRNA:
        if x in ["N", "R", "Y", "S", "W", "M", "K", "B", "D", "H", "V"]:
            flag = True
            break
    return flag


def complement(complementary_strand):
    coding_strand = ""
    n_dictionary = {"A" : "T", "T" : "A", "G" : "C", "C" : "G", "N" : "N", "R" : "Y", "Y" : "R", "S" : "W", "W" : "S", "M": "K", "K" : "M", "B" : "V", "V" : "B", "H" : "D", "D" : "H"}
    for x in complementary_strand:
        for base, complementary_base in n_dictionary.items():
            if x == base:
                coding_strand += complementary_base
                break
    return coding_strand


def reverse_transcribe(mRNA):
    sense_strand = mRNA.replace("U", "T")
    complementary_strand = complement(sense_strand)
    return {"complementary strand" : complementary_strand, "sense strand" : sense_strand}


mrna = input("enter mRNA sequence: ").strip().upper()

if valid_strand(mrna) == True:
    print("mRNA: ", mrna, end="\n")
    if ambiguity(mrna) == True:
        print("\033[1mambiguity detected\033[0m", end="\n")
    else:
        print("\033[1mambiguity not detected\033[0m", end="\n")
    print("DNA: ", reverse_transcribe(mrna), end="\n")
else:
    print("input strand invalid")

mRNA:  UACNCAYGC
[1mambiguity detected[0m
DNA:  {'complementary strand': 'ATGNGTRCG', 'sense strand': 'TACNCAYGC'}


In [13]:
# gc content
# ATGCRYSWKMBDHVN

def valid_strand(strand):
    flag = False
    for x in strand:
        if x not in ["A", "T", "G", "C", "N", "R", "Y", "S", "W", "M", "K", "B", "D", "H", "V"]:
            flag = True
            break
    return not flag


def ambiguity(strand):
    flag = False
    for x in strand:
        if x in ["N", "R", "Y", "S", "W", "M", "K", "B", "D", "H", "V"]:
            flag = True
            break
    return flag 


def gc_content(strand):
    count = 0
    gc_score = {
        "G" : 1.0,
        "C" : 1.0,
        "N" : 0.5, # A, T, G or C
        "R" : 0.5, # A, or G
        "Y" : 0.5, # C or T
        "S" : 1.0, # G or C
        "W" : 0.0, # A or T
        "K" : 0.5, # G or T
        "M" : 0.5, # A or C
        "B" : 2/3, # C, G or T
        "D" : 1/3, # A, G or T
        "H" : 1/3, # A, C or T
        "V" : 2/3 # A, C or G
    }
    for x in strand:
        for base, score in gc_score.items():
            if x == base:
                count += score
                break
    return count/len(strand)


dna = input("enter DNA strand: ").strip().upper()

if valid_strand(dna) == True:
    print("DNA strand: ", dna, end="\n")
    if ambiguity(dna) == True:
        print("\033[1mambiguity detected\033[0m", end="\n")
    else:
        print("\033[1mambiguity not detected\033[0m", end="\n")
    print("gc content: ", gc_content(dna), end="\n")
else:
    print("input strand invalid", end="\n")

DNA strand:  ATGCRYSWKMBDHVN
[1mambiguity detected[0m
gc content:  0.5
