In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go

def create_taxonomy_sunburst(taxonomy_csv_path, taxonomy_column, color_discrete_sequence=None):
    df = pd.read_csv(taxonomy_csv_path)
    data = []
    taxonomy_data = df[taxonomy_column].dropna()
    
    for entry in taxonomy_data:
        levels = entry.split("/")
        for i in range(1, len(levels) + 1):
            # Build full path for each level
            full_path = "/".join(levels[:i])
            data.append({
                "ids": full_path,
                "labels": levels[i-1],
                "parents": "/".join(levels[:i-1]) if i > 1 else "",
                "level": i
            })
    
    hierarchy_df = pd.DataFrame(data)

    path_counts = hierarchy_df['ids'].value_counts().reset_index()
    path_counts.columns = ['ids', 'count']
    

    merged_df = hierarchy_df.merge(path_counts, on='ids', how='left')
    
    merged_df = merged_df.drop_duplicates(subset=['ids'])
    

    if color_discrete_sequence is None:
        color_discrete_sequence = [
            '#f7fbff', '#deebf7', '#c6dbef', '#9ecae1', 
            '#6baed6', '#4292c6', '#2171b5', '#08519c', '#08306b'
        ]
    
    # Create the sunburst chart
    fig = go.Figure(go.Sunburst(
        ids=merged_df['ids'],
        labels=merged_df['labels'],
        parents=merged_df['parents'],
        values=merged_df['count'],
        maxdepth=None,
        marker=dict(
            colors=merged_df['level'],
            colorscale='Blues', 
            showscale=True,  # Show color scale
            colorbar=dict(
                title='Taxonomy Depth',
                titleside='right'
            )
        ),
        branchvalues='total',
        hovertemplate='<b>%{label}</b><br>Count: %{value}<br>Path: %{id}<extra></extra>'
    ))
    
    # Customize layout
    fig.update_layout(
        title_text="Taxonomy Sunburst Chart",
        title_x=0.5,
        width=1000,
        height=1000,
        margin=dict(t=50, l=25, r=25, b=25)
    )
    
    return fig, merged_df

# Usage
taxonomy_csv_path = "/home/nick/Downloads/ZooScanNet/taxa.csv"
taxonomy_column = 'lineage_level1'

fig , df = create_taxonomy_sunburst(taxonomy_csv_path, taxonomy_column)
fig.show()

In [None]:
df