In [2]:
import plotly.graph_objects as go
import pandas as pd
import numpy as np

# Dataset
data = {
    'Species': ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q'],
    'N90': [0.17, 0, 0, 466.13, 0.26, 0.2, 552.11, 0.07, 1.1, 0.11, 0.14, 0.24, 2.41, 9.85, 15.42, 50.61, 26.26],
    'Scaffolds': [14122, 0, 109197, 8, 13803, 187217, 8, 35663, 0, 29551, 28660, 130347, 1154, 189, 135, 61, 95],
    'BUSCO': [92.71, 0, 0, 94.1, 93.75, 96.4, 98.5, 92.92, 96.5, 94.03, 94.03, 97.5, 95.1, 97.9, 97.9, 98.3, 97.9],
    'Sample4': [25, 0, 21, 19, 17, 15, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13]
}


df = pd.DataFrame(data)

categories = ['N90', 'Scaffolds', 'BUSCO', 'Sample4']

fig = go.Figure()

spacing = 50
df['y_position'] = df.index * spacing

def percentile_scaling(series):
    """Scale based on percentile rank."""
    return series.rank(pct=True) * 30  # Scale between 0 to 30

#  percentile scaling to all categories
for category in categories:
    df[f'log_{category}'] = percentile_scaling(df[category])

    #  use for uniform log scaling
    # df[f'log_{category}'] = df[category].apply(lambda x: 20 * np.log10(x + 1))

    #....
    #use this for distinct scaling
    # if category == 'Scaffolds':
    #     df[f'log_{category}'] = percentile_scaling(df[category])
    # else:
    #     df[f'log_{category}'] = df[category].apply(lambda x: 20 * np.log10(x + 1))

    # Simulate shadow
    fig.add_trace(go.Scatter(
        y=df['y_position'] + 1,
        x=[category] * len(df),
        mode='markers',
        marker=dict(
            size=df[f'log_{category}'],
            sizemode='diameter',
            color='rgba(50, 50, 50, 0.5)'
        ),
        hoverinfo='none'
    ))




    # Main bubbles
    fig.add_trace(go.Scatter(
        y=df['y_position'],
        x=[category] * len(df),
        mode='markers',
        marker=dict(
            size=df[f'log_{category}'],
            sizemode='diameter',
            opacity=0.5
        ),
        hoverinfo='none'
    ))

    # Annotations
    annotation_x = category
    max_radius = 1 * df[f'log_{category}'].max()

    for _, row in df.iterrows():
        fig.add_annotation(
            x=annotation_x,
            y=row['y_position'],
            xref="x",
            yref="y",
            text=str(row[category]),
            xshift=max_radius + 5,
            showarrow=False,
            font=dict(
                family="Arial, sans-serif",
                size=12,
                color="black"
            )
        )

# layout
fig.update_layout(
    title="Scatter plot for Species based on provided metrics",
    xaxis=dict(title="Metrics", type='category'),
    yaxis=dict(title="Species", tickvals=df['y_position'], ticktext=df['Species']),
    showlegend=False,
    height=800
)

fig.show()
