# Biopython Annotated Chromosomes Diagram

In [None]:
import gzip
import os

import pandas as pd
from Bio import SeqIO

In [None]:
chrom_list = []
for file_name in os.listdir('./'):
    suffix = file_name.split(".")[-1]
    if file_name[-4:] != "a.gz":
        continue
    fasta_file = gzip.open(file_name, mode='rt')
    for chrom in SeqIO.parse(fasta_file, "fasta"):
        # Check if the sequence ID could be coerced into an integer, which is likely to be a chromosome
        if not any([chrom.id.replace(prefix, "").isdigit() for prefix in ["Chr", "chr", "C", "c"]]):
            continue
        # Add species and genotype columns for a better annotation
        chrom_list.append([file_name, chrom.id, len(chrom.seq)])

# Dataframe that contains each chromosome lenght
df = pd.DataFrame(chrom_list)
df.columns = ["genome", "chr", "len"]
df

In [None]:
from reportlab.lib.units import cm
from Bio import SeqIO
from Bio.Graphics import BasicChromosome

selected = df[df["genome"]=="Zmays_493_APGv4.fa.gz"]
entries = selected[["chr", "len"]].values.tolist()
max_len = df["len"].max()
telomere_length = 2000000

chr_diagram = BasicChromosome.Organism()
chr_diagram.page_size = (29.7 * cm, 21 * cm)  # A4 landscape

for index, (name, length) in enumerate(entries):
    
    cur_chromosome = BasicChromosome.Chromosome(name)
    # Set the scale to the MAXIMUM length plus the two telomeres in bp,
    # want the same scale used on all five chromosomes so they can be
    # compared to each other
    cur_chromosome.scale_num = max_len + 2

    # Record an Artemis style integer color in the feature's qualifiers,
    # 1 = Black, 2 = Red, 3 = Green, 4 = blue, 5 =cyan, 6 = purple
    
    # The features can either be SeqFeature objects, or tuples of values: start (int), 
    # end (int), strand (+1, -1, O or None), label (string), ReportLab color (string or object), 
    # and optional ReportLab fill color.
    
    # This will it to every chromsome. Make chromsome specific lists
    features = [(1000000,1000000, None, "ABC1", "black",), 
                (1000000,1000000, None, "ABC2", "black",),
                (100000000,100000000, None, "ABC3", "blue", )]
    
    # Add an opening telomere
    start = BasicChromosome.TelomereSegment()
    start.scale = telomere_length
    cur_chromosome.add(start)
    
    # Add a body - again using bp as the scale length here.
    body = BasicChromosome.AnnotatedChromosomeSegment(length, features)
    body.scale = length
    cur_chromosome.add(body)
    
    # Add a closing telomere
    end = BasicChromosome.TelomereSegment(inverted=True)
    end.scale = telomere_length
    cur_chromosome.add(end)
    
    # This chromosome is done
    chr_diagram.add(cur_chromosome)

chr_diagram.draw("test_chrom.pdf", "Genome Diagram")