# DNABio Project - Genetics10

#### This is a code to translate DNA string into RNA and into aminoacids.
##### Copyright Joan Alcaide Núñez, 2024, it may be used under the MIT license
##### For more information see repository at [Joanalnu's GitHub](https://github.com/joanalnu/Genetics10)

In [30]:
# import required libraries
from random import randint
import pandas as pd
import os
import requests


class Genetics10():
    def __init__(self):
        # Setup variables
        self.sample = 1
        self.dirpath = os.path.dirname(os.path.abspath(__doc__))

    def dna2rna(self, dna):
        """Returns RNA string by inputting a DNA string"""
        rna = ""
        for base in dna:
            if base=='A' or base=='a':
                rna+='U'
            elif base=='T' or base=='t':
                rna+='A'
            elif base=='C' or base=='c':
                rna+='G'
            elif base=='G' or base=='g':
                rna+='C'
            else:
                print('Error: could not read provided DNA string')
        return rna

    def rna2amino(self, rna):
        """Returns amino acids by inputting an RNA string"""
        amino=''
        codon_catalog = {'UUU': 'Phe', 'UUC': 'Phe', 'UUA': 'Leu', 'UUG': 'Leu',
            'UCU': 'Ser', 'UCC': 'Ser', 'UCA': 'Ser', 'UCG': 'Ser',
            'UAU': 'Tyr', 'UAC': 'Tyr', 'UAA': 'STOP', 'UAG': 'STOP',
            'UGU': 'Cys', 'UGC': 'Cys', 'UGA': 'STOP', 'UGG': 'Trp',
            'CUU': 'Leu', 'CUC': 'Leu', 'CUA': 'Leu', 'CUG': 'Leu',
            'CCU': 'Pro', 'CCC': 'Pro', 'CCA': 'Pro', 'CCG': 'Pro',
            'CAU': 'His', 'CAC': 'His', 'CAA': 'Gln', 'CAG': 'Gln',
            'CGU': 'Arg', 'CGC': 'Arg', 'CGA': 'Arg', 'CGG': 'Arg',
            'AUU': 'Ile', 'AUC': 'Ile', 'AUA': 'Ile', 'AUG': 'Met',
            'ACU': 'Thr', 'ACC': 'Thr', 'ACA': 'Thr', 'ACG': 'Thr',
            'AAU': 'Asn', 'AAC': 'Asn', 'AAA': 'Lys', 'AAG': 'Lys',
            'AGU': 'Ser', 'AGC': 'Ser', 'AGA': 'Arg', 'AGG': 'Arg',
            'GUU': 'Val', 'GUC': 'Val', 'GUA': 'Val', 'GUG': 'Val',
            'GCU': 'Ala', 'GCC': 'Ala', 'GCA': 'Ala', 'GCG': 'Ala',
            'GAU': 'Asp', 'GAC': 'Asp', 'GAA': 'Glu', 'GAG': 'Glu',
            'GGU': 'Gly', 'GGC': 'Gly', 'GGA': 'Gly', 'GGG': 'Gly'
        }
        for i in range(len(rna)-2, 3):
            codon = str(rna[i]+rna[i+1]+rna[i+2])
            if codon in codon_catalog:
                if codon_catalog[codon]=='STOP':
                    break
                amino+= ' ' + codon_catalog[codon]
            else:
                return f'Error: invalid codon {codon}'
        return amino

    def dna2amino(self, dna):
        amino=''
        """Returns amino acids by inputting an DNA string"""
        codon_catalog = {'UUU': 'Phe', 'UUC': 'Phe', 'UUA': 'Leu', 'UUG': 'Leu',
            'UCU': 'Ser', 'UCC': 'Ser', 'UCA': 'Ser', 'UCG': 'Ser',
            'UAU': 'Tyr', 'UAC': 'Tyr', 'UAA': 'STOP', 'UAG': 'STOP',
            'UGU': 'Cys', 'UGC': 'Cys', 'UGA': 'STOP', 'UGG': 'Trp',
            'CUU': 'Leu', 'CUC': 'Leu', 'CUA': 'Leu', 'CUG': 'Leu',
            'CCU': 'Pro', 'CCC': 'Pro', 'CCA': 'Pro', 'CCG': 'Pro',
            'CAU': 'His', 'CAC': 'His', 'CAA': 'Gln', 'CAG': 'Gln',
            'CGU': 'Arg', 'CGC': 'Arg', 'CGA': 'Arg', 'CGG': 'Arg',
            'AUU': 'Ile', 'AUC': 'Ile', 'AUA': 'Ile', 'AUG': 'Met',
            'ACU': 'Thr', 'ACC': 'Thr', 'ACA': 'Thr', 'ACG': 'Thr',
            'AAU': 'Asn', 'AAC': 'Asn', 'AAA': 'Lys', 'AAG': 'Lys',
            'AGU': 'Ser', 'AGC': 'Ser', 'AGA': 'Arg', 'AGG': 'Arg',
            'GUU': 'Val', 'GUC': 'Val', 'GUA': 'Val', 'GUG': 'Val',
            'GCU': 'Ala', 'GCC': 'Ala', 'GCA': 'Ala', 'GCG': 'Ala',
            'GAU': 'Asp', 'GAC': 'Asp', 'GAA': 'Glu', 'GAG': 'Glu',
            'GGU': 'Gly', 'GGC': 'Gly', 'GGA': 'Gly', 'GGG': 'Gly'
        }
        for i in range(len(dna)-2, 3):
            codon = str(dna[i]+dna[i+1]+dna[i+2])
            if codon in codon_catalog:
                if codon_catalog[codon]=='STOP':
                    break
                amino+= ' ' + codon_catalog[codon]
            else:
                return f'Error: invalid codon {codon}'
        return amino

    def compare(self, original, copy):
        """Compares two different string (original, copy) and return True or False with the reason"""
        if len(original) != len(copy):
            return 'not same length'
        else:
            for i in range(len(original)):
                if original[i]!=copy[i]:
                    return f'Failed in {i} base/amino'
            return "Identical"

    def check(self, string):
        if len(string)%3 == 0:
            if string[:-3]=='TAC' and (string[-3]=='ATT' or string[-3]=='ATC' or string[-3]=='ACC'):
                return 'Valid DNA string'
            elif string[:-3]=='AUG' and (string[-3]=='UAA' or string[-3]=='UAG' or string[-3]=='UGG'):
                return 'Valid RNA string'
            else:
                return 'Invalid string (starting/ending codons not found)'

    def read_input(self, path):
        """if string return string; if a txt file path returns string in file"""
        if path[0]=='/':
            try:
                file = open(path, 'r')
                if len(file)==1:
                    contents = str(file)
                else:
                    contents = list()
                    for line in file:
                        contents.append(str(line))
                return contents
            except OSError or KeyError:
                return 'Could not open file, please, check user guide.'
        else:
            return path

    def createmutation(self, string):
        mutated = ""
        muttype = randint(1, 6)
        index = randint(0, len(string)-1)
        for i in range(len(string)):
            if i == index:
                if muttype==1: # change for A
                    mutated+='A'
                elif muttype==2: # change for T
                    mutated+='T'
                elif muttype==3: # change for C
                    mutated+='C'
                elif muttype==4: # change for G
                    mutated+='G'
                elif muttype==5: # remove base
                    continue
                elif muttype==6: # add random base
                    base = randint(1, 4)
                    if base==1: mutated+='A'
                    elif base==2: mutated+='T'
                    elif base==3: mutated+='C'
                    elif base==4: mutated+='G'
            else:
                mutated+=string[i]
        return mutated

    def iterate(self, strings, functions):
        """Creates a CSV file in your directory with the information you request."""
        """The argument consits of a list of strings and a list of functions"""
        columns = ['input']+[function for function in functions]
        df = pd.DataFrame(columns=columns)
        
        for string in strings:
            memory = [string]
            for function in functions:
                result = getattr(self, function)(memory[-1])
                memory.append(result)
            df = pd.concat([df, pd.DataFrame([memory], columns=columns)], ignore_index=True)
        
        df.to_csv(f'{self.dirpath}/Results.csv', index=False)
        return df

biogen = Genetics10()

## Sample code for translating DNA into aminoacids

In [None]:
# input
my_dna = 'TACCACGTGGACTGAGGACTCCTCATT' # provide DNA string or txt file path as '/<fullpath>'

# get rna string
my_rna = biogen.dna2rna(my_dna)
print(my_rna)

# get aminoacids string
my_amino = biogen.rna2amino(my_rna)
print(my_amino)

## Make your own code using the functions

### Available functions of biogen class:

1. dna2rna(string)
    Transcribes the provided DNA string into a RNA string by changing the bases (A->U, T-> A, C->G, G->C).
   
2. rna2amino(string)
    Transcribes the provided DNA string into an aminoacid string by reading codons (3x bases) and using the catalog.

3. dna2amino(string)
    Transcribes DNA strings directly to aminoacids strings, it's a merge of the dna2rna and rna2amino methods.

4. compare(string1, string2)
    Compares the strings (regardless if DNA, RNA, or aminoacids), it always return a boolean and a string. True if both strings are identical, or False and where do the string differ.

5. check(string)
    It checks if the provided string is a valid DNA or RNA string. It does not check for aminoacid strings.

6. read_input(string)
    Used to open files if a path instead of a DNA string is provided as input.

7. createmutation(string)
    Returns a new string with a mutation (only 1 per run). The mutation can change a base, erase a base or add a new one in any position.


In [23]:
ADNs = ['TACATGACTTGCATT', 'TACCATTGCATT', 'TACATGGCTTAGCTAATT']
functions =['dna2rna', 'rna2amino']
output = biogen.iterate(ADNs, functions)
output

Unnamed: 0,input,dna2rna,rna2amino
0,TACATGACTTGCATT,AUGUACUGAACGUAA,
1,TACCATTGCATT,AUGGUAACGUAA,
2,TACATGGCTTAGCTAATT,AUGUACCGAAUCGAUUAA,


In [27]:
# Example usage of get uniprot id functions
sequence = "MALWMRLLPLLALLALWGPDPAAAFVNQHLCGSHLVEALYLVCGERGFFYTPKT"
uniprot_id = biogen.get_uniprot_id(sequence)

if uniprot_id:
    print(f"UniProt ID: {uniprot_id}")

Failed to fetch data: 404.


In [37]:
# Example usage of generate protein function
uniprot_id = "Q8W3K0"
structure_data = biogen.generateprotein(uniprot_id)

structure_data

Failed to fetch data: 404
