<a href="https://colab.research.google.com/github/dharshinikbt23-crypto/Bioinformatics-5th-sem/blob/main/Phylogentic_Analsysis_8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install required packages
!pip install biopython

# Install MAFFT (multiple sequence alignment tool)
!apt-get install -y mafft

from Bio import AlignIO, Phylo
from Bio.Phylo.TreeConstruction import DistanceCalculator, DistanceTreeConstructor
from IPython.display import IFrame
from google.colab import files
import matplotlib.pyplot as plt
import subprocess

print("=" * 60)
print("PHYLOGENETIC ANALYSIS WORKFLOW")
print("=" * 60)

# Step 1: Upload your sequences file
print("\n[Step 1] Upload your sequences.fasta file:")
uploaded = files.upload()

# Get the uploaded filename
uploaded_filename = list(uploaded.keys())[0]
print(f"✓ Uploaded: {uploaded_filename}")

# Rename to sequences.fasta if needed
if uploaded_filename != 'sequences.fasta':
    with open('sequences.fasta', 'wb') as f:
        f.write(uploaded[uploaded_filename])
    print(f"✓ Renamed to 'sequences.fasta'")
else:
    with open('sequences.fasta', 'wb') as f:
        f.write(uploaded[uploaded_filename])

# Step 2: Run MAFFT alignment using subprocess
print("\n[Step 2] Running MAFFT multiple sequence alignment...")
try:
    # Run MAFFT directly via command line
    result = subprocess.run(
        ['mafft', '--auto', 'sequences.fasta'],
        capture_output=True,
        text=True,
        check=True
    )

    # Save aligned output
    with open("aligned.fasta", "w") as handle:
        handle.write(result.stdout)

    print("✓ Alignment completed successfully!")

    # Read the alignment
    align = AlignIO.read("aligned.fasta", "fasta")
    print(f"  - Number of sequences: {len(align)}")
    print(f"  - Alignment length: {align.get_alignment_length()}")

    # Download the aligned file
    print("\n[Step 3] Downloading aligned.fasta...")
    files.download('aligned.fasta')

except subprocess.CalledProcessError as e:
    print(f"Error running MAFFT: {e}")
    print("Error output:", e.stderr)
    raise
except Exception as e:
    print(f"Unexpected error: {e}")
    raise

# Step 4: View alignment online
print("\n[Step 4] You can view your alignment at:")
print("https://fast.alignmentviewer.org/")
print("(Upload your aligned.fasta file there)")

# Step 5: Calculate distance matrix
print("\n[Step 5] Calculating distance matrix...")
calculator = DistanceCalculator('identity')
dist_matrix = calculator.get_distance(align)
print("✓ Distance matrix calculated")
print("\nDistance Matrix:")
print(dist_matrix)

# Step 6: Construct phylogenetic trees
print("\n[Step 6] Constructing phylogenetic trees...")
constructor = DistanceTreeConstructor()

# UPGMA Tree
print("\n--- UPGMA Tree ---")
upgma_tree = constructor.upgma(dist_matrix)
print("✓ UPGMA tree constructed")

# Draw UPGMA tree
fig = plt.figure(figsize=(12, 8))
plt.title("UPGMA Phylogenetic Tree", fontsize=14, fontweight='bold')
Phylo.draw(upgma_tree, do_show=False)
plt.tight_layout()
plt.show()

# ASCII representation
print("\nUPGMA Tree (ASCII):")
Phylo.draw_ascii(upgma_tree)

# Neighbor-Joining Tree
print("\n--- Neighbor-Joining (NJ) Tree ---")
nj_tree = constructor.nj(dist_matrix)
print("✓ NJ tree constructed")

# Draw NJ tree
fig = plt.figure(figsize=(12, 8))
plt.title("Neighbor-Joining Phylogenetic Tree", fontsize=14, fontweight='bold')
Phylo.draw(nj_tree, do_show=False)
plt.tight_layout()
plt.show()

# ASCII representation
print("\nNJ Tree (ASCII):")
Phylo.draw_ascii(nj_tree)

# Step 7: Save trees
print("\n[Step 7] Saving tree files...")
Phylo.write(upgma_tree, "upgma_tree.xml", "phyloxml")
Phylo.write(nj_tree, "nj_tree.xml", "phyloxml")
print("✓ Trees saved as upgma_tree.xml and nj_tree.xml")

# Optional: Download tree files
print("\n[Step 8] Download tree files?")
try:
    files.download('upgma_tree.xml')
    files.download('nj_tree.xml')
except:
    print("Tree files saved in your Colab workspace")

print("\n" + "=" * 60)
print("PHYLOGENETIC ANALYSIS COMPLETED!")
print("=" * 60)

Collecting biopython
  Downloading biopython-1.86-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (13 kB)
Downloading biopython-1.86-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (3.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m40.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: biopython
Successfully installed biopython-1.86
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  fonts-lato libauthen-sasl-perl libclone-perl libdata-dump-perl
  libencode-locale-perl libfile-listing-perl libfont-afm-perl
  libhtml-form-perl libhtml-format-perl libhtml-parser-perl
  libhtml-tagset-perl libhtml-tree-perl libhttp-cookies-perl
  libhttp-daemon-perl libhttp-date-perl libhttp-message-perl
  libhttp-negotiate-perl libio-html-perl libio-socket-ssl-perl
  liblwp-me

Saving exp 8.rtf to exp 8.rtf
✓ Uploaded: exp 8.rtf
✓ Renamed to 'sequences.fasta'

[Step 2] Running MAFFT multiple sequence alignment...
✓ Alignment completed successfully!
Unexpected error: No records found in handle


ValueError: No records found in handle