In [21]:
# %%
import altair as alt
import pandas as pd

# Read in data
df = pd.read_csv("/Users/santiago/Downloads/all_batch_summeries (2).csv", usecols=range(1,14))

# Clean column names
df.columns = df.columns.str.replace(' ', '_')
df.columns = df.columns.str.replace('[^a-zA-Z0-9_]', '', regex=True)
df.columns = df.columns.str.lower()

# Make field for mag id
df["mag_id"] = df["input_file"].str.split('.', expand=True)[0]
df["percent_gaps"] = df["percent_gaps"].str.split('%', expand=True)[0].map(float)

# Pivot long
df2 = pd.melt(
    df, 
    id_vars=[
        "sample_id", 
        "mag_id", 
        "dataset", 
        "n_markers", 
        'scaffold_n50', 
        'contigs_n50',
        'percent_gaps', 
        'number_of_scaffolds',
    ], 
    value_vars=[
        "single", "duplicated","fragmented","missing"
    ], 
    value_name="BUSCO_percentage", 
    var_name="category"
)

df3 = df[[
    "sample_id", 
    "mag_id",
    'scaffold_n50', 
    'contigs_n50',
    'percent_gaps', 
    'number_of_scaffolds',
]]

# Specify order
mapping = {"single":1, "duplicated":2,"fragmented":3,"missing":4}
df2["order"] = df2["category"].map(mapping)
df2["fracc_markers"] = "~" + round(df2["BUSCO_percentage"] * df2["n_markers"] / 100).map(int).map(str) + "/124"
df2


# %%
# Plot
domain = ['single', 'duplicated', 'fragmented', 'missing']
range_ = ['#1E90FF', '#87CEFA', '#FFA500', '#FF7F50']

output_plot = alt.Chart(df2).mark_bar().encode(
    x=alt.X(
        'sum(BUSCO_percentage)', 
        stack="normalize", 
        title="BUSCO fracc."
    ),
    y=alt.Y('mag_id', axis=alt.Axis(title='MAG ID')),
    color=alt.Color(
        'category', 
        scale=alt.Scale(domain=domain, range=range_), 
        legend=alt.Legend(title="BUSCO Category", orient="top")
    ),
    order=alt.Order('order', sort='ascending'),
    tooltip=[
        alt.Tooltip("sample_id", title="Sample ID"),
        alt.Tooltip("mag_id", title="MAG ID"),
        alt.Tooltip("dataset", title="Lineage dataset"),
        alt.Tooltip("fracc_markers", title="Aprox. number of markers in this category"),
        alt.Tooltip("BUSCO_percentage", title="Percentage [%]"),
    ],
    # column=alt.Column("scaffold_n50", title="Scaffold N50"),
    opacity=alt.value(0.85)).properties(width=600, height=18*9).facet(row=alt.Row("sample_id", title='Sample ID')).resolve_scale(y="independent")

# output_plot = output_plot.configure_axis(labelFontSize=17, titleFontSize=20)
# output_plot = output_plot.configure_legend(labelFontSize=17, titleFontSize=20)
# output_plot = output_plot.configure_header(labelFontSize=17, titleFontSize=20)
output_plot 

# %%
# Plot
domain = ['single', 'duplicated', 'fragmented', 'missing']
range_ = ['#1E90FF', '#87CEFA', '#FFA500', '#FF7F50']

# Drop down menu
dropdown = alt.binding_select(
    options=[
    'scaffold_n50', 
    'contigs_n50',
    'percent_gaps', 
    'number_of_scaffolds',
], 
    name="Assambly Statistics: "
)

xcol_param = alt.param(
    value='scaffold_n50',
    bind=dropdown
)

output_plot2 = alt.Chart(df3).mark_bar().encode(
    x=alt.X('x:Q').title('Assambly Statistic'),
    y=alt.Y('mag_id', axis=None),
    tooltip=[
        alt.Tooltip('x:Q', title=f"value"),
    ],
    opacity=alt.value(0.85)
).transform_calculate(
    x=f'datum[{xcol_param.name}]'
).add_params(
    xcol_param
).properties(
    width=600, 
    height=18*9
).facet(
    row=alt.Row("sample_id", title=None, header=alt.Header(labelFontSize=0))
).resolve_scale(
    y="independent"
)

output_plot2.configure_axis(title=None, labels=False)

# Combine the two charts side by side
out3 = alt.hconcat(output_plot, output_plot2, spacing=3)  # Adjust spacing as needed
out3 = out3.configure_axis(labelFontSize=17, titleFontSize=20)
out3 = out3.configure_legend(labelFontSize=17, titleFontSize=20)
out3 = out3.configure_header(labelFontSize=17, titleFontSize=20)
out3



In [7]:
import altair as alt
from vega_datasets import data

cars = data.cars.url

In [12]:
import altair as alt
from vega_datasets import data

cars = data.cars.url

alt.Chart(cars).mark_circle().encode(
    x='Horsepower:Q',
    y='Miles_per_Gallon:Q',
    color='Origin:N'
)

'https://cdn.jsdelivr.net/npm/vega-datasets@v1.29.0/data/cars.json'