In [1]:
%pip install genomenotebook
%pip install pandas

Collecting genomenotebook
  Obtaining dependency information for genomenotebook from https://files.pythonhosted.org/packages/30/f9/53fac4486d939fc9353692d8aa0be4178a50c78eda11674060ab6d61e8a5/genomenotebook-0.8.9-py3-none-any.whl.metadata
  Downloading genomenotebook-0.8.9-py3-none-any.whl.metadata (1.8 kB)
Collecting pandas>=1.5.3 (from genomenotebook)
  Obtaining dependency information for pandas>=1.5.3 from https://files.pythonhosted.org/packages/d0/28/88b81881c056376254618fad622a5e94b5126db8c61157ea1910cd1c040a/pandas-2.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata
  Downloading pandas-2.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)
Collecting bokeh>=3.1.0 (from genomenotebook)
  Obtaining dependency information for bokeh>=3.1.0 from https://files.pythonhosted.org/packages/c6/5d/46cde55344ad96a0570e2f72d9df428349a6a800448f6a5b6140c337f930/bokeh-3.2.1-py3-none-any.whl.metadata
  Downloading bokeh-3.2.1-py3-none-any.whl.meta

In [2]:
import os
import pandas as pd
import genomenotebook as gn

In [3]:
def get_operon_coords(one_operon):
    if one_operon['strand'].iloc[0] == '+':
        start = one_operon['start'].min()
        end = one_operon['end'].max()
    else:
        start = one_operon['end'].max()
        end = one_operon['start'].min()

    return pd.Series({'start': start, 'end': end,
                      'strand': one_operon['strand'].iloc[0],
                      'N_genes': len(one_operon),
                     })

def extract_operons(operons_genes):
    operons = operons_genes.groupby('operon').apply(get_operon_coords).reset_index()
    operons['left'] = operons[['start', 'end']].min(axis=1)
    operons['right'] = operons[['start', 'end']].max(axis=1)
    operons['color'] = 'lightblue'
    operons['type'] = 'region'
    return operons

In [6]:
from collections import defaultdict
from bokeh.io.output import output_file

gff_path = './data_2/results/final/o_antigen_operons.gff3'
gff = gn.parse_gff(gff_path)
operons = extract_operons(gff)

import random

glyphs=gn.get_default_glyphs()
glyphs = {
    'CDS': gn.Glyph(
        glyph_type="arrow", 
        colors="blue", 
        height=0.7,
        show_name=True
    ),
    'transposon': gn.Glyph(
        glyph_type="arrow", 
        colors="red", 
        height=0.7,
        show_name=True
    )
}


g=gn.GenomeBrowser(
    gff_path=gff_path, #genome_path=genome_path,
    glyphs=glyphs, 
    feature_types=['CDS', 'transposon'],
    height=400,
    width=1200,
    init_pos=618013,
    feature_height=0.1,
    attributes = ["gene_name","function","start","end"],
)

print(operons)
g.highlight(data=operons, hover_data=['operon', 'N_genes'])

'AGOPJMEK_00551'
output_file('operons.html')
g.show()


  operon    start      end strand  N_genes     left    right      color  \
0   1860  3666749  3645471      -       20  3645471  3666749  lightblue   
1   2267  4518297  4506017      -       14  4506017  4518297  lightblue   
2     44    73698    81565      +        8    73698    81565  lightblue   
3     46    86913    84884      -        4    84884    86913  lightblue   
4    705  1493997  1488172      -        7  1488172  1493997  lightblue   
5    804  1711698  1730261      +       17  1711698  1730261  lightblue   
6    806  1734614  1746345      +       13  1734614  1746345  lightblue   

     type  
0  region  
1  region  
2  region  
3  region  
4  region  
5  region  
6  region  
