In [1]:
pip install biopython

Collecting biopython
  Downloading biopython-1.83-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m11.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: biopython
Successfully installed biopython-1.83


In [2]:
from Bio.Seq import Seq
my_dna = Seq("ATCG")
print(my_dna)

ATCG


In [3]:
rna = my_dna.transcribe()
protein = my_dna.translate()
print(rna)
print(protein)

AUCG
I




In [19]:
from Bio import SeqIO

# Read a FASTA file
filename = "rcsb_pdb_8X1D.fasta"
for record in SeqIO.parse(filename, "fasta"):
    print(f"ID: {record.id}")
    print(f"Sequence: {record.seq}")

ID: 8X1D_1|Chains
Sequence: DTTNYWQDWTDGVGTVNASNLAGGQYSVSWTNCGNFVVGKGWSTGSPSRVVNYNAGAFSPNGNGYLSLYGWTRSPLIEYYVVDDWGSYRPTGTYMGTVTSDGGTYDIYTATRVNAPSIDGTQTFTQFWSVRQSKRSIGTNNTITFANHVNAWKSHGMNLGSSWAYQIIATEGYQSSGYANVTVW


In [5]:
#Pairwise Alignment
from Bio import pairwise2

seq1 = "ATCGTAGCTA"
seq2 = "AGCTAGCTG"

alignments = pairwise2.align.globalxx(seq1, seq2)
best_alignment = alignments[0]

print(f"Aligned sequence 1: {best_alignment[0]}")
print(f"Aligned sequence 2: {best_alignment[1]}")

Aligned sequence 1: ATCG-TAGCTA-
Aligned sequence 2: A--GCTAGCT-G




In [6]:
#Pairwise Score
from Bio import pairwise2

# Example DNA sequences
sequence1 = "ACGGGTACGT"
sequence2 = "ACGGTACG"

# Perform global alignment and get the alignment score
alignments = pairwise2.align.globalxx(sequence1, sequence2, score_only=True)

print(f"Alignment score: {alignments:.1f}")

Alignment score: 8.0


In [8]:
#Custom Pairwise Score
from Bio import pairwise2
from Bio.Align import substitution_matrices

# Example DNA sequences
sequence1 = "ACGGGTACGT"
sequence2 = "ACGGTACG"

# Custom scoring matrix as a dictionary (match: +1, mismatch: -1, gap: -2)
# Creating a substitution matrix dictionary
matrix = {
    ('A', 'A'): 1, ('A', 'C'): -1, ('A', 'G'): -2, ('A', 'T'): -2,
    ('C', 'A'): -1, ('C', 'C'): 1, ('C', 'G'): -2, ('C', 'T'): -2,
    ('G', 'A'): -2, ('G', 'C'): -2, ('G', 'G'): 1, ('G', 'T'): -1,
    ('T', 'A'): -2, ('T', 'C'): -2, ('T', 'G'): -1, ('T', 'T'): 1
}

# Perform global alignment and get the alignment score
alignments = pairwise2.align.globalds(sequence1, sequence2, matrix, -2, -2, score_only=True)

print(f"Alignment score: {alignments:.1f}")

Alignment score: 4.0


In [9]:
#Custom Pairwise Score
from Bio import pairwise2

# Example DNA sequences
sequence1 = "ACGGGTACGT"
sequence2 = "ACGGTACG"

# Adjusted scoring matrix as a dictionary (increased match score, reduced mismatch penalty)
matrix = {
    ('A', 'A'): 5, ('A', 'C'): -1, ('A', 'G'): -2, ('A', 'T'): -2,
    ('C', 'A'): -1, ('C', 'C'): 5, ('C', 'G'): -2, ('C', 'T'): -2,
    ('G', 'A'): -2, ('G', 'C'): -2, ('G', 'G'): 5, ('G', 'T'): -1,
    ('T', 'A'): -2, ('T', 'C'): -2, ('T', 'G'): -1, ('T', 'T'): 5
}

# Adjusted gap penalties
gap_open_penalty = -2
gap_extend_penalty = -1

# Perform global alignment and get the alignment score
alignments = pairwise2.align.globalds(sequence1, sequence2, matrix, gap_open_penalty, gap_extend_penalty, score_only=True)

print(f"Alignment score: {alignments:.1f}")

Alignment score: 36.0


In [10]:
#Multiple Alignment Sequence
from Bio import SeqIO, AlignIO
from Bio.Align import MultipleSeqAlignment
from Bio.SeqRecord import SeqRecord

# Create SeqRecord objects for our sequences
seq_A = SeqRecord("ATCGTACG", id="SeqA")
seq_B = SeqRecord("ATCGTCCG", id="SeqB")

# Create a MultipleSeqAlignment object
alignment = MultipleSeqAlignment([seq_A, seq_B])

# Print the alignment
print(alignment)

Alignment with 2 rows and 8 columns
ATCGTACG SeqA
ATCGTCCG SeqB




In [13]:
from Bio import pairwise2

# Define the sequences
seq1 = "ATCGTACG"
seq2 = "ATCGTCCG"
seq3 = "ATCGTAAA"
seq4 = "ATCGTGGG"

# Calculate pairwise alignments
alignments = [
    pairwise2.align.globalxx(seq1, seq2),
    pairwise2.align.globalxx(seq1, seq3),
    pairwise2.align.globalxx(seq1, seq4),
    pairwise2.align.globalxx(seq2, seq3),
    pairwise2.align.globalxx(seq2, seq4),
    pairwise2.align.globalxx(seq3, seq4),
]

# Print alignment scores
for i, alignment_list in enumerate(alignments, start=1):
    for alignment in alignment_list:
        seqA, seqB, score, start, end = alignment
        print(f"Alignment {i}: Score = {score:.1f}")
        print(f"Seq1: {seqA}")
        print(f"Seq2: {seqB}\n")
        # Since we are only interested in one alignment per pair, break after the first
        break

Alignment 1: Score = 7.0
Seq1: ATCGTAC-G
Seq2: ATCGT-CCG

Alignment 2: Score = 6.0
Seq1: ATCGTACG--
Seq2: ATCGTA--AA

Alignment 3: Score = 6.0
Seq1: ATCGTACG--
Seq2: ATCGT--GGG

Alignment 4: Score = 5.0
Seq1: ATCGTCCG---
Seq2: ATCGT---AAA

Alignment 5: Score = 6.0
Seq1: ATCGTCCG--
Seq2: ATCGT--GGG

Alignment 6: Score = 5.0
Seq1: ATCGTAAA---
Seq2: ATCGT---GGG



In [14]:
from Bio import pairwise2, SeqIO
from Bio.Align import MultipleSeqAlignment
from Bio.SeqRecord import SeqRecord

# Define the sequences
seq1 = "ATCGTACG"
seq2 = "ATCGTCCG"
seq3 = "ATCGTAAA"
seq4 = "ATCGTGGG"

# Create SeqRecord objects for our sequences
seq_A = SeqRecord(seq1, id="SeqA")
seq_B = SeqRecord(seq2, id="SeqB")
seq_C = SeqRecord(seq3, id="SeqC")
seq_D = SeqRecord(seq4, id="SeqD")

# Create a MultipleSeqAlignment object
alignment = MultipleSeqAlignment([seq_A, seq_B, seq_C, seq_D])

# Print the alignment
print(alignment)

# Calculate pairwise alignment scores
alignment_scores = []
for i in range(len(alignment)):
    for j in range(i + 1, len(alignment)):
        score = pairwise2.align.globalxx(alignment[i].seq, alignment[j].seq, one_alignment_only=True)[0][2]
        alignment_scores.append((alignment[i].id, alignment[j].id, score))

# Print alignment scores
for a, b, score in alignment_scores:
    print(f"Alignment score between {a} and {b}: {score:.1f}")

Alignment with 4 rows and 8 columns
ATCGTACG SeqA
ATCGTCCG SeqB
ATCGTAAA SeqC
ATCGTGGG SeqD
Alignment score between SeqA and SeqB: 7.0
Alignment score between SeqA and SeqC: 6.0
Alignment score between SeqA and SeqD: 6.0
Alignment score between SeqB and SeqC: 5.0
Alignment score between SeqB and SeqD: 6.0
Alignment score between SeqC and SeqD: 5.0




In [24]:
from Bio import AlignIO, SeqRecord, pairwise2
from Bio.Align import MultipleSeqAlignment, AlignInfo


def get_consensus_seq(filename: str) -> SeqRecord:
    # Load the MSA from a file (e.g., in FASTA format)
    common_alignment = MultipleSeqAlignment(chain(*AlignIO.parse(filename, "fasta")))

    # Calculate the consensus sequence
    summary = AlignInfo.SummaryInfo(common_alignment)
    consensus = summary.dumb_consensus(0.7, "N")  # Adjust the threshold as needed

    return consensus

# Example usage:
consensus_seq = get_consensus_seq("rcsb_pdb_8X1D.fasta")
print(consensus_seq)

DTTNYWQDWTDGVGTVNASNLAGGQYSVSWTNCGNFVVGKGWSTGSPSRVVNYNAGAFSPNGNGYLSLYGWTRSPLIEYYVVDDWGSYRPTGTYMGTVTSDGGTYDIYTATRVNAPSIDGTQTFTQFWSVRQSKRSIGTNNTITFANHVNAWKSHGMNLGSSWAYQIIATEGYQSSGYANVTVW


>>> from Bio.Seq import Seq
>>> from Bio.SeqRecord import SeqRecord
>>> from Bio.Align import MultipleSeqAlignment
>>> from Bio.Align.AlignInfo import SummaryInfo
>>> msa = MultipleSeqAlignment([SeqRecord(Seq('ACGT')),
...                             SeqRecord(Seq('ATGT')),
...                             SeqRecord(Seq('ATGT'))])
>>> summary = SummaryInfo(msa)
>>> dumb_consensus = summary.dumb_consensus(ambiguous='N')
>>> print(dumb_consensus)
ANGT
>>> alignment = msa.alignment
>>> from Bio.motifs import Motif
>>> motif = Motif('ACGT', alignment)
>>> print(motif.consensus)
ATGT
>>> print(motif.degenerate_consensus)
AYGT
>>> counts = motif.counts
>>> consensus = counts.calculate_consensus(identity=0.7)
>>> print(consensus)
ANGT

If your multiple sequence alignment object was obtained using Bio.AlignIO, then you can obtain a new-style Alignment object directly by using Bio.Align.read instead of Bio.AlignIO.read, or Bio.Align.parse instead of Bio.AlignIO.parse.
