In [1]:
# Run this notebook with `Python 3` kernel

"""
Creates figures comparing four methods
"""

import pandas as pd
import altair as alt

from altair_saver import save
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [4]:
for env in ['sludge', 'chicken', 'human', 'sheep']:
    df_four_methods_representative_bgc_count = pd.read_csv(f"{env}/df_four_methods_representative_bgc_count.tsv", sep='\t')

    df_four_methods_representative_bgc_count_melt = pd.melt(df_four_methods_representative_bgc_count, id_vars=['assembler_name'], value_vars=['representative_bgc_partial_count', 'representative_bgc_complete_count'])
    df_four_methods_representative_bgc_count_melt = df_four_methods_representative_bgc_count_melt.replace({'representative_bgc_partial_count': 'Partial', \
                                                                                                            'representative_bgc_complete_count': 'Complete', \
                                                                                                            'hifibgc': 'HiFiBGC', \
                                                                                                            'hicanu': 'bgc_HiCanu', \
                                                                                                            'metaflye': 'bgc_metaFlye', \
                                                                                                            'hifiasm-meta': 'bgc_hifiasm-meta'
                                                                                                          })

    # Ordering in stack bars in below altair chart is implemented as suggested here https://stackoverflow.com/questions/66347857/sort-a-normalized-stacked-bar-chart-with-altair/66355902#66355902
    order_in_stacked_bar = ['Complete', 'Partial']
    domain = ['Complete', 'Partial']
    #range_ = ['#f8766d', '#00bfc4']
    range_ = ['#f8766d', '#00c467']
    chart = alt.Chart(df_four_methods_representative_bgc_count_melt).mark_bar().encode(
        alt.X('assembler_name:N', sort='y', axis=alt.Axis(title='Method', labelAngle=-45, labelFontSize=10, titleFontSize=12)),
        alt.Y('value:Q', sort='y', axis=alt.Axis(title='Count of representative BGCs', labelFontSize=10, titleFontSize=12)),
        alt.Color('variable:N', sort=order_in_stacked_bar, title='Complete/Partial', scale=alt.Scale(domain=domain, range=range_), \
                  legend=alt.Legend(
                        orient='none', direction='vertical', titleAnchor='middle', legendX=85, legendY=75, labelFontSize=10, titleFontSize=12)
                 ),
        alt.Order('color_variable_sort_index:Q')
    ).configure_scale(
        bandPaddingInner=0.3 # for changing distance between bars
    )

    # Save plot
    save(chart, f"{env}/{env}_comparison_between_four_methods_bgc-count_and_partial-complete_updated.pdf")

In [3]:
for env in ['sludge', 'chicken', 'human', 'sheep']:
    df_four_methods_metadata = pd.read_csv(f"{env}/df_four_methods_bgc_all_metadata.tsv", sep='\t')
    
    df_four_methods_metadata = df_four_methods_metadata[df_four_methods_metadata['Representative_Member'] == True]
    df_four_methods_metadata['Method_Name'] = df_four_methods_metadata['Method_Name'].replace({'hifibgc': 'HiFiBGC', \
                                                                                            'hicanu': 'bgc_HiCanu', \
                                                                                            'metaflye': 'bgc_metaFlye', \
                                                                                            'hifiasm-meta': 'bgc_hifiasm-meta'
                                                                                    })
    
    domain = ['NRPS', 'Others', 'PKS-NRP_Hybrids', 'PKSI', 'PKSother', 'RiPPs', 'Saccharides', 'Terpene']
    range_ = ['#0072B2', '#999999', '#56B4E9', '#009E73', '#F0E442', '#E69F00', '#D55E00', '#CC79A7']
    
    chart = alt.Chart(df_four_methods_metadata).mark_bar().encode(
    alt.X('Method_Name:N', sort='y', axis=alt.Axis(title='Method', labelAngle=-45)),
    alt.Y('count():Q', sort='y', axis=alt.Axis(title='Count of representative BGCs', labelFontSize=13, titleFontSize=14)),
    color=alt.Color('BiG-SCAPE_class', scale=alt.Scale(domain=domain, range=range_))
    ).configure_scale(
        bandPaddingInner=0.3 # for changing distance between bars
    )
    
    # Save plot
    save(chart, f"{env}/{env}_comparison_between_four_methods_bgc-type.pdf")