In [1]:
import pandas as pd
import sqlite3
from ipywidgets import HTML
import matplotlib as mpl
from matplotlib import pyplot as plt
import numpy as np

In [2]:
con = sqlite3.connect('data/core.muri2.2.denorm.db')

In [3]:
mag = pd.read_sql("""
    SELECT
        genome_id
      , genome_type
      , completeness
      , contamination
      , n_contigs
      , total_length
      , gc_percent
      , n50
    FROM genome
    JOIN checkm USING (genome_id)
    JOIN quast USING (genome_id)
                  """, con=con,
                  index_col=['genome_id'])

mag_names = { 'B1A': 'B1A'
            , 'B1B': 'B1B'
            , 'B2': 'B2'
            , 'B3': 'B3'
            , 'B4': 'B4'
            , 'B5': 'B5'
            , 'B6': 'B6'
            , 'B7': 'B7'
            , 'B8': 'B8'
           , 'Muribaculum_intestinale_DSM_28989T': 'Mi'
           , 'Duncaniella_muris_DSM_103720T': 'Dm'
           , 'Duncaniella_freteri_DSM_108168T': 'Df'
           , 'Duncaniella_dubosii_DSM_107170T': 'Dd'
           , 'Paramuribaculum_intestinale_DSM_100749T': 'Pi'
           , 'Homeothermus_arabinoxylanisolvens': 'Ha'
           , 'Amulumruptor_caecigallinarius': 'Ac'
            }

m = mag.copy().rename(mag_names)
m.total_length = m.total_length.apply(lambda x: '{:.2}'.format(x / 1e6))
m.completeness = m.completeness.apply(lambda x: '{:.0%}'.format(x / 100))
m.contamination = m.contamination.apply(lambda x: '{:.2%}'.format(x / 100))
m.gc_percent = m.gc_percent.apply(lambda x: '{:.1%}'.format(x / 100))


feats = ['completeness', 'n_contigs', 'total_length', 'n50', 'gc_percent']

HTML(m.loc[list(mag_names.values()), feats].to_html())

HTML(value='<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></…

In [10]:
pd.DataFrame.to_markdown?

In [11]:
print(m.loc[list(mag_names.values()), feats].to_markdown(tablefmt="grid"))

+-------------+----------------+-------------+----------------+---------+--------------+
| genome_id   | completeness   |   n_contigs |   total_length |     n50 | gc_percent   |
| B1A         | 95%            |         286 |            3.1 |   23337 | 46.6%        |
+-------------+----------------+-------------+----------------+---------+--------------+
| B1B         | 94%            |         320 |            2.8 |   19144 | 47.0%        |
+-------------+----------------+-------------+----------------+---------+--------------+
| B2          | 96%            |         116 |            2.7 |   75014 | 50.6%        |
+-------------+----------------+-------------+----------------+---------+--------------+
| B3          | 93%            |          62 |            2.7 |   80587 | 55.8%        |
+-------------+----------------+-------------+----------------+---------+--------------+
| B4          | 97%            |          30 |            2.7 |  127141 | 55.4%        |
+-------------+------

In [12]:
mag.n50[['B1A', 'B1B', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8']].median()

61151.0