In [1]:
import Bio
print(Bio.__version__)


1.83


In [2]:
print(Bio.__version__)

1.83


In [4]:
import Bio.SeqUtils
help(Bio.SeqUtils)


Help on package Bio.SeqUtils in Bio:

NAME
    Bio.SeqUtils - Miscellaneous functions for dealing with sequences.

PACKAGE CONTENTS
    CheckSum
    IsoelectricPoint
    MeltingTemp
    ProtParam
    ProtParamData
    lcc

CLASSES
    builtins.dict(builtins.object)
        CodonAdaptationIndex
    
    class CodonAdaptationIndex(builtins.dict)
     |  CodonAdaptationIndex(sequences, table=NCBICodonTableDNA(id=1, names=['Standard', 'SGC0'], ...))
     |  
     |  A codon adaptation index (CAI) implementation.
     |  
     |  Implements the codon adaptation index (CAI) described by Sharp and
     |  Li (Nucleic Acids Res. 1987 Feb 11;15(3):1281-95).
     |  
     |  Method resolution order:
     |      CodonAdaptationIndex
     |      builtins.dict
     |      builtins.object
     |  
     |  Methods defined here:
     |  
     |  __init__(self, sequences, table=NCBICodonTableDNA(id=1, names=['Standard', 'SGC0'], ...))
     |      Generate a codon adaptiveness table from the coding DNA 

In [9]:
from Bio.Seq import Seq

test_seq = Seq("ATGCAAGCGCTAGCGTAG")
gc_content = (test_seq.count("G") + test_seq.count("C")) / len(test_seq) * 100
print(gc_content)


55.55555555555556


In [10]:
def find_orfs(dna_sequence):
    start_codon = 'ATG'
    stop_codons = ['TAA', 'TAG', 'TGA']

    def find_orfs_in_frame(sequence):
        orfs = []
        for i in range(0, len(sequence), 3):
            if sequence[i:i+3] == start_codon:
                for j in range(i+3, len(sequence), 3):
                    if sequence[j:j+3] in stop_codons:
                        orfs.append(sequence[i:j+3])
                        break
        return orfs

    def reverse_complement(sequence):
        complement = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'}
        return ''.join([complement[base] for base in sequence[::-1]])

    orfs_found = []
    for frame in range(3):
        # Find ORFs in the original sequence
        orfs_found.extend(find_orfs_in_frame(dna_sequence[frame:]))
        # Find ORFs in the reverse complement sequence
        orfs_found.extend(find_orfs_in_frame(reverse_complement(dna_sequence)[frame:]))
    
    return orfs_found

# Test the function with a sample DNA sequence
test_sequence = "ATGCGATAGCGTATGCTAATAGCTAGTGAATGCGCGTAG"
orfs = find_orfs(test_sequence)
orfs


['ATGCGATAG', 'ATGCTAATAGCTAGTGAATGCGCGTAG']

In [11]:
def find_motifs(sequence, motifs):
    """
    Find all occurrences of given motifs in a sequence.
    
    :param sequence: A string, the sequence in which to search for motifs.
    :param motifs: A list of strings, the motifs to search for.
    :return: A dictionary with each motif and a list of start positions where that motif is found.
    """
    motif_positions = {motif: [] for motif in motifs}  # Initialize a dictionary to store positions
    
    for motif in motifs:
        start = 0  # Start at the beginning of the sequence
        while start < len(sequence):
            pos = sequence.find(motif, start)  # Find the motif in the sequence from the current start position
            if pos == -1:  # If the motif is not found, break out of the loop
                break
            motif_positions[motif].append(pos)  # Add the found position to the list for this motif
            start = pos + 1  # Move the start position past the last found position
            
    return motif_positions

# Example usage:
sequence = "ATGCGATAGCGTATGCTAATAGCTAGTGAATGCGCGTAG"
motifs = ["ATG", "TAG", "TGA"]
found_motifs = find_motifs(sequence, motifs)
print(found_motifs)


{'ATG': [0, 12, 29], 'TAG': [6, 19, 23, 36], 'TGA': [26]}


In [12]:
def find_motifs(sequence, motif):
    motif_locations = []
    motif_length = len(motif)
    for i in range(len(sequence) - motif_length + 1):
        if sequence[i:i+motif_length] == motif:
            motif_locations.append(i)
    return motif_locations

# Example usage:
sequence = "ATGCGATAGCGTATGCTAATAGCTAGTGAATGCGCGTAG"
motif = "ATGC"
motif_locations = find_motifs(sequence, motif)
print(f"Motif '{motif}' found at positions: {motif_locations}")


Motif 'ATGC' found at positions: [0, 12, 29]
