For both PacBio and MiSeq data:
* Upload **barplot.qzv** file to [https://view.qiime2.org/](https://view.qiime2.org/)
* Set **Taxonomic Level** to **Level 7**
* Download csv

In [70]:
import pandas as pd
import altair as alt

In [71]:
pacbio_df = pd.read_csv("pacbio_level-7.csv")
columns = [f.split(';')[-1].strip('s__') if f.startswith('d__Bacteria') else f for f in pacbio_df.columns.tolist()]
columns = ['unknown' if f == '' else f for f in columns]
columns = ['uncultured' if f.startswith('uncultured') else f for f in columns]
pacbio_df.columns = columns
pacbio_df2 = pacbio_df.drop(['index','condition'],axis=1).T.sum(axis=1).reset_index()
pacbio_df2.columns = ['species','counts']
pacbio_df3 = pacbio_df2.groupby('species').agg(sum).sort_values('counts', ascending=False)

In [72]:
alt.Chart(pacbio_df3.reset_index().sort_values('counts', ascending=False).head(20)).mark_bar().encode(
    y=alt.Y('counts', stack="normalize"),
    color='species',
    tooltip=['species']
).properties(
    width=400,
    height=700
)

In [73]:
alt.Chart(pacbio_df3.reset_index().sort_values('counts', ascending=False).head(20)).mark_bar().encode(
    x=alt.X('counts'),
    y=alt.Y("species", sort='-x'),
    tooltip=['species']
).properties(height=700)

In [74]:
miseq_df = pd.read_csv("miseq_level-7.csv")
columns = [f.split(';')[-1].strip('s__') if f.startswith('d__Bacteria') else f for f in miseq_df.columns.tolist()]
columns = ['unknown' if f == '' else f for f in columns]
columns = ['uncultured' if f.startswith('uncultured') else f for f in columns]
miseq_df.columns = columns
miseq_df2 = miseq_df.drop(['index','condition'],axis=1).T.sum(axis=1).reset_index()
miseq_df2.columns = ['species','counts']
miseq_df3 = miseq_df2.groupby('species').agg(sum).sort_values('counts', ascending=False)

In [75]:
alt.Chart(miseq_df3.reset_index().sort_values('counts', ascending=False).head(20)).mark_bar().encode(
    y=alt.Y('counts', stack="normalize"),
    color='species',
    tooltip=['species']
).properties(
    width=400,
    height=700
)

In [76]:
alt.Chart(miseq_df3.reset_index().sort_values('counts', ascending=False).head(20)).mark_bar().encode(
    x=alt.X('counts'),
    y=alt.Y("species", sort='-x'),
    tooltip=['species']
).properties(height=700)

In [77]:
pacbio_df3 = pacbio_df3.reset_index().sort_values('counts', ascending=False).head(20)
pacbio_df3.set_index('species', inplace=True)
pacbio_df3['platform'] = 'PacBio'
pacbio_df3.reset_index(inplace=True)

In [78]:
miseq_df3 = miseq_df3.reset_index().sort_values('counts', ascending=False).head(20)
miseq_df3.set_index('species', inplace=True)
miseq_df3['platform'] = 'MiSeq'
miseq_df3.reset_index(inplace=True)

In [79]:
merged_df = pd.concat([pacbio_df3, miseq_df3], ignore_index=True)

In [38]:
from vega_datasets import data

source = data.barley()

In [39]:
source.head()

Unnamed: 0,yield,variety,year,site
0,27.0,Manchuria,1931,University Farm
1,48.86667,Manchuria,1931,Waseca
2,27.43334,Manchuria,1931,Morris
3,39.93333,Manchuria,1931,Crookston
4,32.96667,Manchuria,1931,Grand Rapids


In [80]:
merged_df.head()

Unnamed: 0,species,counts,platform
0,Staphylococcus_aureu,106624.0,PacBio
1,Escherichia_coli,77726.0,PacBio
2,unknown,63074.0,PacBio
3,uncultured,17950.0,PacBio
4,Vibrio_fluviali,16790.0,PacBio


In [84]:
merged_df['counts'] = merged_df['counts'].astype(int)

In [106]:
alt.Chart(merged_df).mark_bar().encode(
    x=alt.X('platform'),
    y=alt.Y('counts', stack="normalize"),
    color='species',
    tooltip=['species'],
    order=alt.Order(
      # Sort the segments of the bars by this field
      'counts',
      sort='ascending'
    )
).properties(
    height=700
).configure_range(
    category={'scheme': 'category20'}
).save(
    "taxo_comparison.html")