In [2]:
import re

In [3]:
def my_decorator(func):
    def wrapper():
        print("Something is happening before the function is called.")
        func()
        print("Something is happening after the function is called.")
    return wrapper

@my_decorator
def say_hello():
    print("Hello!")

say_hello()

Something is happening before the function is called.
Hello!
Something is happening after the function is called.


In [4]:
class MyClass:
    class_variable = "I am a class variable"

    @classmethod
    def class_method(cls):
        print(f"Accessing class variable: {cls.class_variable}")

    @staticmethod
    def static_method():
        print("I am a static method. I don't need class or instance data.")

In [5]:
MyClass.class_method()

Accessing class variable: I am a class variable


In [6]:
MyClass.static_method()

I am a static method. I don't need class or instance data.


In [7]:
class Sequence:
    def __init__(self, sequence):
        self.sequence = sequence.upper()

    def get_length(self):
        """Returns the length of the sequence."""
        return len(self.sequence)

    def get_composition(self):
        """Returns the composition of each character in the sequence."""
        composition = {}
        for char in self.sequence:
            composition[char] = composition.get(char, 0) + 1
        return composition

    def find_motif(self, motif):
        """Finds the motif in the sequence and returns its positions."""
        matches = re.finditer(motif, self.sequence)
        return list(matches)

In [8]:
seq = Sequence("AcGTaCGtAgA")

In [9]:
seq.sequence

'ACGTACGTAGA'

In [10]:
seq.get_length()

11

In [11]:
seq.get_composition()

{'A': 4, 'C': 2, 'G': 3, 'T': 2}

In [12]:
seq.find_motif("AC")

[<re.Match object; span=(0, 2), match='AC'>,
 <re.Match object; span=(4, 6), match='AC'>]

In [13]:
class DNA(Sequence):
    def __init__(self, sequence):
        super().__init__(sequence)

        if set(self.sequence) != set("ATGC"):
            raise ValueError("DNA sequence can only contain A, T, C, and G.")

    def get_orf(self):
        seq = self.sequence
        seqlen = self.get_length()

        start_codons = ["ATG", "GTG"]
        stop_codons = ["TAG", "TAA", "TGA"]

        for frame in [0, 1, 2] :
            i = frame

            for i in range(frame, seqlen-2, 3) :
                start_codon = seq[i:i+3]

                if start_codon in start_codons :

                    for j in range(i, seqlen-1, 3) :
                        stop_codon = seq[j:j+3]

                        if stop_codon in stop_codons :
                            orf = seq[i:j+3+1]
                            print(f"ORF found in frame {frame+1} from {i+1} to {j+3}:")
                            print(orf)
                            i = j
                            break

In [14]:
dna = DNA("CCAGTTATCGAAAAAAAGTGAAGCGTAAAAACTAAACGGTACTTGGAAAATTTACTTTGCCTGTGTGCGTGCTTCTGTTTATGAAAACTACGTTTTTAAGCATTTGCAATTGATGCAACGTTGCTTTGGAAGGCAAGAAAACACCAAGTGGAAACGCCCGGGTGGCGGAAATCGACTTCCAGTGCGCGAGTGCGAGATTGTGTGTGTTTCGTTCCGTCCTCTGGTGTCTGGCACCTTAACACAACAACAACTACAGCAGCAGCAGCAACAACAACAATAGCAAGCAGCAACAACAACACACGCACAATAATAAGAAGGCCTGTGACGTTGTCGCTGCCGTCACCATCTAAAGGAAAATACCAAAACGAGTAGGAAAACAAACCGCAGTGCAGTAGGGAAAGCAAAAGTCGGGTTTTTTTTTTTACAAAACTAGGTCCCTCATTAGCTTGTCCCTAGGGTTAGTGAAACAGCCTCAACACGAATTACCTACCGTTTGTTCTTCAGCGGCGATCGACTGTGGTTGCCAGCGCCCCAGCGCCAGGATCTGCCTCTAGGATCCCGGAATCGCCCACCACCACAGGGAGCAACATCATTAACATCATCTACAGCCAATCCACCCATCCAAACAGCAGCCCCACCAGCGGCAGTGCAGAGAAATTCAGCTGGCAGCAATCCTGGCCAAGCAGGACCTCCGCGGCCCCTACACACGATTCCGGAACCATGTCAATAAACACAACTTTCGACCTCAGCTCGCCCAGCGTTACATCGGGTCATGCGCTTACGGAACAGACTCAGGTCGTAAAGGAGGGGTGGCTGATGAAGAGGGGCGAGCACATAAAGAACTGGCGCCAGCGCTACTTCGTGCTCCACTCCGATGGAAGACTGATGGGTTACCGCAGCAAGCCGGCAGATAGTGCCAGCACGCCATCGGACTTCCTGCTCAACAACTTTACGGTGCGCGGCTGCCAGATCATGACCGTCGATCGGCCCAAGCCATTCACCTTCATCATCCGCGGCCTGCAATGGACCACGGTAATCGAAAGGACATTTGCCGTCGAATCAGAATTAGAGCGCCAGCAGTGGACGGAGGCCATTCGCAACGTATCCAGCCGGCTCATAGATGTGGGTGAGGTGGCTATGACGCCATCTGAACAGACAGACATGACAGACGTGGACATGGCCACCATTGCCGAGGACGAGCTTTCCGAGCAGTTCTCCGTACAGGGAACGACTTGTAATAGCAGCGGCGTTAAGAAAGTGACTTTGGAGAATTTTGAGTTCCTCAAGGTGCTCGGCAAGGGCACCTTTGGTAAGGTTATTTTGTGTCGCGAGAAGGCTACCGCCAAGCTGTACGCGATCAAAATTCTTAAAAAGGAAGTTATTATCCAAAAGGACGAGGTGGCCCATACCCTGACCGAGAGTCGTGTGCTCAAGTCCACAAATCATCCGTTCCTCATTTCACTCAAATATTCGTTTCAAACCAACGACCGCCTGTGTTTTGTGATGCAGTACGTGAACGGTGGCGAGCTCTTTTGGCATTTAAGCCACGAACGCATATTTACTGAGGATCGAACACGTTTCTATGGGGCAGAGATCATTTCTGCTCTGGGCTATCTGCATTCACAGGGCATAATTTATCGCGACTTAAAGCTGGAAAATCTTTTGCTGGACAAAGATGGTCACATAAAGGTCGCAGACTTCGGTCTGTGCAAGGAGGACATCACCTACGGCCGCACAACGAAAACCTTCTGCGGTACACCGGAATACCTAGCTCCGGAAGTATTAGATGACAATGATTATGGCCAAGCGGTGGACTGGTGGGGCACTGGCGTGGTCATGTACGAGATGATCTGTGGTCGTCTTCCGTTTTACAATCGTGATCATGATGTGCTCTTTACATTGATTTTGGTGGAGGAGGTAAAATTCCCGCGTAATATCACAGATGAGGCGAAAAATCTGTTAGCAGGCCTTCTAGCTAAGGATCCCAAAAAGCGTTTGGGAGGTGGAAAGGATGATGTCAAGGAGATACAAGCACATCCATTCTTTGCGAGTATTAACTGGACAGATCTAGTGTTGAAAAAAATACCGCCGCCTTTCAAGCCTCAGGTGACATCCGATACGGATACAAGGTACTTCGACAAGGAGTTTACGGGAGAGAGTGTGGAGTTGACGCCGCCGGATCCCACTGGCCCGTTGGGCTCCATAGCCGAAGAGCCGCTTTTCCCGCAGTTCAGCTACCAAGGAGACATGGCCTCCACGTTGGGCACCTCGTCGCACATTAGTACTTCCACAAGTCTCGCATCGATGCAATAGAACAAGTTTACAAACTTTTACCAGCACACATCATGTTCTGCTTCATCACCTATTACTATACTATACCTATAACACATAGAAATGGGCAAAAGAAAACAACAGCAAGAATATATTAAATATTAATTATACCATGATAACGAAGGCAAGCAAAGGGTTTTCTTGCTAGTTTATATACACCAATAATAAGAGAATTGAATTGATAGTAACTTATTTTTGTATAATGAGAAAGGGAACGTTGAACGAAAGGGCAACAGCCGAAGCCGAGGAAAATAATTTACTACATATGATTATACACATATATATAAATATATATATATATATATATATAATTATATATACAACTAAATATAACACTTGGGGCATAGCGCTTGAAGTGATTAGTTAATTTTAAACGCAAGGAAAACATTTAACCAAGCGACTTAGAGCGACTTAGTGCTAGGATAAAGCATGCATCATCGGATCGCATCCTCTAGGGGTCGCTTAAGCTGATTGATTATACAGATTATTCACGCAGACAGCACTCACACAGCCACTTAGCATATAGCGAACATGAAGTTTAGTTAACTTGACAGCAACAAATCGAAATACACATATTCAACAACCGCCCCCACATGCAACACTCGTATATATAAATTATTTATACTCCAAAGAAGCAAAAAAGAAACAGAAACAATTACAAGTTATATATAATGTTAAAATTGCCTCTAATGTGCTGTGCTTTATGCAAAACTACAACAACAGAAACAAACAAACTAATAGTTAATAAACGAGGGAAACATACATCAATGCAGTTTATAGGCGCCAATAATTAGCAAGCAACTAAAAGTTATATTCAGGCAGAAACATATTTCGAAACAATAACGGCAACAGCAAAATGATTACAATTGACGCCATTTTTTATTATAATTTTTTACGAAAAATATATATTTGTAAATATTTATAAAAAAAAAGCAAAAACTCAAGAGTAAATGCAAAAAAAAAACAAAGGATGTATGTGTCGTGTGGAAAATGGAAGTCACCATTTGAGGCCACAGAAAAGGTTTAACCTTTGACACAATAAAAAATGTATCTTTGTCATTTCATTTTTGGCATTGAAAAGTTCATAACAAGTTTTGTATACTTAGAGCAAGTCAGCAAAAAATATATATAAATCAACCACAACAGACTTACAAATAGTTCCAAAAGGAAGATTAAAAATTATATAAACAGAAAAAAAACCTAAAAGAAAATAAAGAATTTTTGATATTTTTATATGCGCAATAAAAACAAAAATCAAGAATAAAACGAAACAAAAAGTTATTTTATATTTCGTAAAGAAAACAAAATAAATATTGTAAATAAAAATTATTAAATAAAATCAAGTTCGGGAATTTCAACTGAAAACAAAGAAATTGAAAGCATAATCTCTCTTCTAAATGTTAAACGAAACCTATCACTCGATGATTACAATAAACAGCAAATCGAAAACGAAGGGATATAGAAACGAAAAGAAAATGCCAAAGAAGCGGCGAAGTTAGGTGGAACTCGAGGAGAACCGAGAATTGAGGAGCCAGCAAGTCGGGATGTTTCGGTTGGAGTCTTGGACATGTGACGATGATGGTTCCAAATCATGACTAGGAACGAACTAACTAGCTAAAGCCAGCCTATGCTGAGGAGAGATATCAGATAGCTCCGATTGAACCAATCTTTATATCCTTGAACAGCTTAAAGTCCTTGCTTTCTGCTCACCAAGTTTGCGGCTGACATCTAGGCTAGCCAATATCAATCTCAGATCGATCAAGCAATACATAGACAAAATATAGAACCCCTAAGCATGGCTTAATAGACATACAACCCGAGAATCGACTTCAACAAGCCCAATAACCCTTCTGACAGACTCGTTAGGCAAGTCTGCCAGCCAAGGGGCAACCAGCTAGAAATCTAAAGCAAAGACTTAGAAATCGGTTAGGGACATCCGAAGCAAATTGAATACTAGCCTTAGTGAACGTCAACATCTCTGGTATCCCAAATCGAAGCATTTTCTACCCGTCGAAGGGGACGAAAGAGACCTTTCAAAATAGTTGTCTAAGTTTACTGAGTTGTGTGAGACAATTTTTGGTTTCGATTAAAAAAAAAAAAACAAAAACTTACACGAACTGCAAACTACAATGTGTTCTAAAGCGATATAATAAAACAACATGGTTTAAAATTGACTATATATAGAAAGAAACCGAGAGAGATATAACCAGTCAGATCATAGATCATCTCGAGCAACTTGAGTTGTTTTTTGTATTGTAAAATAGAAAAAGATAAACCCGTTCCAACAACCGAGATCCAGATCCAGATCCAGCCCCCAATACTAACCGAAAATTTGATTAGGAGTAATTTTTGATAGGCGAAAATGCGAGTTTCGCACAGTTTTAATTATTAATTATATTAAATAAAGATAATAATGACTAAGCTGATTTATAAAACGATTCAAGATCGAACTAAAACATACATATACATGCATTCAAACCAAGTTTATTATACAGTGAAAACATAATAATAAAACTACATAAAACCAAACTACTATGC")

In [15]:
dna.get_length()

4869

In [16]:
dna.get_composition()

{'C': 1013, 'A': 1695, 'G': 987, 'T': 1174}

In [17]:
dna.get_orf()

ORF found in frame 1 from 148 to 372:
GTGGAAACGCCCGGGTGGCGGAAATCGACTTCCAGTGCGCGAGTGCGAGATTGTGTGTGTTTCGTTCCGTCCTCTGGTGTCTGGCACCTTAACACAACAACAACTACAGCAGCAGCAGCAACAACAACAATAGCAAGCAGCAACAACAACACACGCACAATAATAAGAAGGCCTGTGACGTTGTCGCTGCCGTCACCATCTAAAGGAAAATACCAAAACGAGTAGG
ORF found in frame 1 from 190 to 372:
GTGCGAGATTGTGTGTGTTTCGTTCCGTCCTCTGGTGTCTGGCACCTTAACACAACAACAACTACAGCAGCAGCAGCAACAACAACAATAGCAAGCAGCAACAACAACACACGCACAATAATAAGAAGGCCTGTGACGTTGTCGCTGCCGTCACCATCTAAAGGAAAATACCAAAACGAGTAGG
ORF found in frame 1 from 202 to 372:
GTGTGTTTCGTTCCGTCCTCTGGTGTCTGGCACCTTAACACAACAACAACTACAGCAGCAGCAGCAACAACAACAATAGCAAGCAGCAACAACAACACACGCACAATAATAAGAAGGCCTGTGACGTTGTCGCTGCCGTCACCATCTAAAGGAAAATACCAAAACGAGTAGG
ORF found in frame 1 from 322 to 372:
GTGACGTTGTCGCTGCCGTCACCATCTAAAGGAAAATACCAAAACGAGTAGG
ORF found in frame 1 from 517 to 2313:
GTGGTTGCCAGCGCCCCAGCGCCAGGATCTGCCTCTAGGATCCCGGAATCGCCCACCACCACAGGGAGCAACATCATTAACATCATCTACAGCCAATCCACCCATCCAAACAGCAGCCCCACCAGCGGCAGTGCAGAGAAATTCAGCTGGCAGCAATCCTGGCCAAGCAGG

In [18]:
class Protein(Sequence):
    def __init__(self, sequence):
        super().__init__(sequence)

In [19]:
protein = Protein("ALMMKLKCALKCHLAMAGMLKLGKMGHCHHMLMAHGMLHLMHGGHHGAKHCCAACALHMGGGKGHHMGGKALMMLMLKALCMCMKGMGCKMGKHAKKHCK")

In [20]:
protein.get_length()

100

In [21]:
protein.get_composition()

{'A': 12, 'L': 14, 'M': 18, 'K': 15, 'C': 10, 'H': 15, 'G': 16}