# Notebook to plot various figures

In this notebook are the generated plotly figures for different tests and metrics

In [1]:
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.io as pio
import pandas as pd
import numpy as np
import os

# Results

### Skull bone marrow results

#### Conventional methods

In [None]:
# Single R confidence scores
# Acuracy
results = [1,1,1,1,1,2,0,1,1,1,1,1,1,1,1,1,2,2,1,1,1,1,1,2,1,1,2,0,0,1,1,1,1,1,4,0,2,1,2,2,2,1,0,0,1,1,0,2,4,0,1,1,4]
groups = {1 : 'Partially Correct', 
          2 : 'Correct', 
          0 : 'Incorrect', 
          4 : 'Unknown'}
result_names = [groups[i] for i in results]


# Delta next
delta_next = pd.read_csv('../../SingleR/results/sbm_singleR_deltanext_mATLAS_droplet.csv')

# Full
df = pd.concat([delta_next, pd.Series(result_names)], axis=1)
df

# Remove unknown
df = df[df[0] != 'Unknown']
df



Unnamed: 0,ct_ann_clusters$delta.next,0
0,0.290358,Partially Correct
1,0.0504,Partially Correct
2,0.547565,Partially Correct
3,0.131532,Partially Correct
4,0.053523,Partially Correct
5,0.06715,Correct
6,0.069641,Incorrect
7,0.13068,Partially Correct
8,0.261961,Partially Correct
9,0.0592,Partially Correct


In [20]:
# Plot barplot

groups = df[0].unique()
colors=['#F2CD5D', '#4C9F70', '#F45B69']
traces = []

for group, col in zip(groups, colors):
    group_data = df[df[0] == group]['ct_ann_clusters$delta.next']
    traces.append(
        go.Box(
            y=group_data,
            name=str(group),
            boxpoints='all',        # Show all points
            jitter=0.4,             # Spread out points for visibility
            pointpos=-1.8,          # Offset points from box
            marker=dict(size=4, opacity=0.6),
            line=dict(width=1, color=col),
            fillcolor='rgba(0,0,0,0)',  # Transparent fill if desired
            boxmean=False           # Optionally show mean
        )
    )

# Create the layout
layout = go.Layout(
    title="Distribution of SingleR 'confidence' by class",
    yaxis=dict(title="Delta Next (SingleR)"),
    xaxis=dict(title="Class"),
    font=dict(size=12),
    plot_bgcolor='white',
    paper_bgcolor='white',
    showlegend=False
)

# Create the figure
fig = go.Figure(data=traces, layout=layout)
fig.show()

In [18]:
# Figure 1 - show summary results for different reference datasets
# Simple barchart for all 5 reference datasets
colors=['#4C9F70', '#F2CD5D', '#F45B69']

ref_datasets=['mATLAS FACS', 'mATLAS Droplet', 'PangLao', 'CellXGene', 'Tabula Muris']

# Results
correct = np.array([3, 10, 5, 9, 8]) / 50 * 100
partially_correct = np.array([12, 31, 35, 30, 30]) / 50 * 100
incorrect = np.array([35, 9, 10, 11, 12]) / 50 * 100

fig = go.Figure(data=[
    go.Bar(name='Correct', x=ref_datasets, y=correct, marker_color='#4C9F70'),
    go.Bar(name='Partially correct', x=ref_datasets, y=partially_correct, marker_color='#F2CD5D'),
    go.Bar(name='Incorrect', x=ref_datasets, y=incorrect, marker_color='#F45B69'),
], layout=dict(
        barcornerradius=5,
    ),)
# Change the bar mode
fig.update_layout(barmode='group', 
                  template='none',
                  yaxis = dict(title='Percentage of clusters (%)',
                               title_font = dict(size=14)),
                  xaxis = dict(title='Reference datasets',
                               title_font = dict(size=14)),
                  title = dict(text = 'SingleR annotation of SBM using 5 reference datasets',
                               font=dict(size=16),
                               xanchor='center',
                               yanchor='middle',
                               x=0.5,
                               y=0.95))
fig.show()
#pio.write_image(fig, '../../PNG_results/sbm_barchart_singleR_results.png')



In [74]:
# Figure 1b - smae results but in pie chart forms

labels = ["Correct", "Partially correct", "Incorrect"]

# Create subplots: use 'domain' type for Pie subplot
fig = make_subplots(rows=1, cols=5, specs=[[{'type':'domain'}, 
                                            {'type':'domain'},
                                            {'type':'domain'},
                                            {'type':'domain'},
                                            {'type':'domain'}]])
fig.add_trace(go.Pie(labels=labels, values=[3, 12, 35], name="mATLAS Droplet"),
              1, 1)
fig.add_trace(go.Pie(labels=labels, values=[10, 31, 9], name="mATLAS FACS"),
              1, 2)
fig.add_trace(go.Pie(labels=labels, values=[5, 35, 10], name="PangLao"),
              1, 3)
fig.add_trace(go.Pie(labels=labels, values=[9, 30, 11], name="CellXGene"),
              1, 4)
fig.add_trace(go.Pie(labels=labels, values=[8, 30, 12], name="Tabula Muris"),
              1, 5)

# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.4, hoverinfo="label+percent+name", 
                  marker=dict(colors=['green', 'orange', 'red']))

fig.update_layout(
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='FACS', x=sum(fig.get_subplot(1, 1).x) / 2, y=0.5,
                      font_size=18, showarrow=False, xanchor="center"),
                 dict(text='Droplet', x=sum(fig.get_subplot(1, 2).x) / 2, y=0.5,
                      font_size=14, showarrow=False, xanchor="center"),
                 dict(text='PangLao', x=sum(fig.get_subplot(1, 3).x) / 2, y=0.5,
                 font_size=12, showarrow=False, xanchor="center"),
                 dict(text='CXG', x=sum(fig.get_subplot(1, 4).x) / 2, y=0.5,
                 font_size=20, showarrow=False, xanchor="center"),
                 dict(text='TM', x=sum(fig.get_subplot(1, 5).x) / 2, y=0.5,
                 font_size=20, showarrow=False, xanchor="center")])
fig.show()

In [95]:
# Figure X - Comparisons of reference datasets
ref_datasets=['mATLAS FACS', 'mATLAS Droplet', 'PangLao', 'Brioni et al', 'Tabula Muris']
colors=['#4C9F70', '#F2CD5D', '#F45B69']

# Results
correct = np.array([3, 10, 5, 9, 8]) / 50 * 100
partially_correct = np.array([12, 31, 35, 30, 30]) / 50 * 100
incorrect = np.array([35, 9, 10, 11, 12]) / 50 * 100


fig = go.Figure()
fig.add_trace(go.Bar(
    y=ref_datasets,
    x=partially_correct,
    name='Partially correct',
    orientation='h',
    marker=dict(
        color='#F2CD5D',
        line=dict(color='orange', width=0)
    ),
    text = [f"{int(val)}%" for val in partially_correct],
    textposition = 'auto'
    ))

fig.add_trace(go.Bar(
    y=ref_datasets,
    x=correct,
    name='Correct',
    orientation='h',
    marker=dict(
        color='#4C9F70',
        line=dict(color='green', width=0)
    ),
    text = [f"{int(val)}%" for val in correct],
    textposition = 'auto'
    ))

fig.add_trace(go.Bar(
    y=ref_datasets,
    x=incorrect,
    name='Incorrect',
    orientation='h',
    marker=dict(
        color='#F45B69',
        pattern_shape = '/'),
    width = 0.8
))

fig.update_layout(barmode='stack', 
                  barcornerradius=6,
                  template='none',
                  xaxis = dict(
                      title='Percentage of clusters (%)',
                      title_font = dict(size=14)),
                  yaxis = dict(automargin=True),
                  height=600)
fig.show()




In [16]:
# Figure 2 - show summary results for marker-based annotation
# Simple barchart 

ref_datasets=['scCATCH', 'scTYPE', 'scMayoMap']
y1 = [3, 10, 5, 9, 8]
y2 = [12, 31, 35, 30, 30]
y3 = [12, 31, 35, 30, 30]

fig = go.Figure(data=[
    go.Bar(name='Correct', x=ref_datasets, y=[13, 17, 12], marker_color='green'),
    go.Bar(name='Partially correct', x=ref_datasets, y=[8, 21, 29], marker_color='orange'),
    go.Bar(name='Incorrect', x=ref_datasets, y=[29, 12, 9], marker_color='red'),
], layout=dict(
        barcornerradius=5,
    ),)
# Change the bar mode
fig.update_layout(barmode='group', 
                  template='none',
                  yaxis = dict(title='Nb of clusters',
                               title_font = dict(size=14)),
                  title = dict(text = 'Annotation results for 3 different marker-based annotation methods',
                               font=dict(size=16),
                               xanchor='center',
                               yanchor='middle',
                               x=0.5,
                               y=0.95))
fig.show()
pio.write_image(fig, '../../PNG_results/sbm_barchart_markerbased_results.png')

In [30]:
# Figure 2b - smae results but in pie chart forms

labels = ["Correct", "Partially correct", "Incorrect"]

# Create subplots: use 'domain' type for Pie subplot
fig = make_subplots(rows=1, cols=3, specs=[[{'type':'domain'}, 
                                            {'type':'domain'},
                                            {'type':'domain'}]])
fig.add_trace(go.Pie(labels=labels, values=[13, 8, 29], name="scCATCH"),
              1, 1)
fig.add_trace(go.Pie(labels=labels, values=[17, 21, 12], name="scTYPE"),
              1, 2)
fig.add_trace(go.Pie(labels=labels, values=[12, 29, 9], name="scMayoMap"),
              1, 3)

# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.4, hoverinfo="label+percent+name", 
                  marker=dict(colors=['green', 'orange', 'red']))

fig.update_layout(
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='scCATCH', x=sum(fig.get_subplot(1, 1).x) / 2, y=0.5,
                      font_size=14, showarrow=False, xanchor="center"),
                 dict(text='scTYPE', x=sum(fig.get_subplot(1, 2).x) / 2, y=0.5,
                      font_size=14, showarrow=False, xanchor="center"),
                 dict(text='scMM', x=sum(fig.get_subplot(1, 3).x) / 2, y=0.5,
                 font_size=20, showarrow=False, xanchor="center")])

fig.show()

In [96]:
# Figure 3 - Test different 'add-ons' to see results on sum of correct and partially correct
# Horizontal barplot showing : Base SingleR result for the best reference dataset (mATLAS Droplet) + Base scMAP result for mATLAS Droplet
# + SingleR result for mATLAS and TM + scMAP for mATLAS and TM + SingleR with MAGIC imputation
ref_datasets = ['SingleR + MAGIC', 'SingleR + TM', 'SingleR', 'scMAP + TM', 'scMAP']

# Results
partially_correct = np.array([32, 34, 31, 31, 19]) / 50 * 100
correct = np.array([11, 6, 10, 7, 8]) / 50 * 100
incorrect = np.array([7, 10, 9, 12, 23]) / 50 * 100


fig = go.Figure()
fig.add_trace(go.Bar(
    y=ref_datasets,
    x=partially_correct,
    name='Partially correct',
    orientation='h',
    marker=dict(
        color='#F2CD5D',
        line=dict(color='orange', width=0)
    ),
    text = [f"{int(val)}%" for val in partially_correct],
    textposition = 'auto'
    ))

fig.add_trace(go.Bar(
    y=ref_datasets,
    x=correct,
    name='Correct',
    orientation='h',
    marker=dict(
        color='#4C9F70',
        line=dict(color='green', width=0)
    ),
    text = [f"{int(val)}%" for val in correct],
    textposition = 'auto'
    ))

fig.add_trace(go.Bar(
    y=ref_datasets,
    x=incorrect,
    name='Incorrect',
    orientation='h',
    marker=dict(
        color='#F45B69',
        pattern_shape = '/'),
    width = 0.8
))

fig.update_layout(barmode='stack', 
                  barcornerradius=6,
                  template='none',
                  xaxis = dict(
                      title='Percentage of clusters (%)',
                      title_font = dict(size=14)),
                  yaxis = dict(automargin=True),
                  height=600)
fig.show()

#### GPTCelltype results vs other methods

In [None]:
# Barchart plot of different versions model of GPT models

#categories = ['GPT-o3mini','GPT-4', 'GPT-4.1','GPT-4.5 Preview']
categories = ['Correct', 'Partially Correct', 'Incorrect']

# Results
gpt_o3mini = np.array([13, 26, 11]) / 50 * 100
gpt_4 = np.array([8, 30, 12]) / 50 * 100
gpt_4_1 = np.array([15, 23, 12]) / 50 * 100
gpt_4_5 = np.array([18, 26, 6]) / 50 * 100



#### UMAPs

In [149]:
# Read csv of results
df = pd.read_csv('../results/sbm_annotations.csv')
df

Unnamed: 0,umap_1,umap_2,orig.ident,nCount_RNA,nFeature_RNA,mitoPercent,nCount_SCT,nFeature_SCT,SCT_snn_res.2.5,seurat_clusters,...,GPTCelltype_new,Gemini,Final,Manual_res,SingleR_res,scTYPE_res,GPTCelltype_res,GPTCelltype_new_res,Gemini_res,Final_res
0,5.088853,-8.600957,WT Skull,1727,1012,1.737116,2021,1011,17,17,...,B cells,Immature B cell,B Cells,Correct,Correct,Correct,Partially correct,Partially correct,Correct,Partially correct
1,-6.478175,-2.756212,WT Skull,1617,578,0.000000,2167,578,2,2,...,Neutrophils,Neutrophil,Neutrophil,Correct,Partially correct,Incorrect,Partially correct,Partially correct,Partially correct,Partially correct
2,2.878542,10.383287,WT Skull,3704,1592,0.512959,2915,1591,30,30,...,Dendritic Cells,Conventional Dendritic Cell type 1 (cDC1),Dendritic Cells,Correct,Partially correct,Correct,Partially correct,Partially correct,Correct,Partially correct
3,2.106007,-10.542571,WT Skull,1593,837,1.569366,2034,837,3,3,...,B cells,B cell,B Cells,Correct,Partially correct,Partially correct,Partially correct,Partially correct,Partially correct,Partially correct
4,10.388345,7.515175,WT Skull,2203,1096,1.679528,2287,1096,20,20,...,Plasmacytoid dendritic cells (pDCs),Plasmacytoid Dendritic Cell,Plasmacytoid Dendritic Cells (Pdc),Correct,Partially correct,Correct,Correct,Correct,Correct,Correct
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24802,8.881256,4.937150,AD Skull,6620,2649,0.876133,2484,1457,18,18,...,Hematopoietic Progenitors,Hematopoietic Stem and Progenitor Cell,Hematopoietic Stem And Progenitor Cells,Correct,Correct,Partially correct,Correct,Correct,Correct,Correct
24803,9.306492,-5.254325,AD Skull,3605,1643,0.693481,2642,1632,37,37,...,Pre-B cells,Pro-B cell,Pro-B Cells/Pre-B Cells,Correct,Correct,Partially correct,Partially correct,Correct,Partially correct,Correct
24804,2.348807,11.018588,AD Skull,3975,1677,0.528302,2722,1655,7,7,...,Monocytes/Macrophages,Monocyte / Macrophage,Monocyte/Macrophage,Correct,Incorrect,Correct,Correct,Correct,Correct,Correct
24805,7.336953,4.995529,AD Skull,9803,3131,0.326431,2296,1137,42,42,...,Mast Cells and Basophils,Basophil / Mast Cell Progenitor,Mast Cells,Correct,Partially correct,Partially correct,Correct,Correct,Partially correct,Correct


##### Base

In [161]:
# Base UMAPS

# Colour map
cell_colors = {
  "Pax5 B cells" : "#22ace0",                      # Light green for B-like cells
  "Basophils" : "#a869f5",                   # Light orange for Basophils
  "CD4 T cells" : "#629c7b",                 # Light blue for CD4 T cells
  "CD8 T cells" : "#7ed9a5",                 # Light green for CD8 T cells
  "cDCs" : "#c9bc2c",                        # Light yellow for cDCs
  "Erythroblasts" : "#cb1818",               # Light pink for Erythroblasts
  "HSC/CMP" : "#abd179",                      # Light cyan for HSPCs
  "MDP" : "#82b342",  
  "GMP" : "#73bf0f",  
  "Immature B cells" : "#fb6a4a",            # Light blue for Immature B cells
  "Macrophages" : "#69afc9",                 # Light red for Macrophages
  "Mast cells" : "#1882cc",                  # Light green for Mast Cells
  "Mature B cells" : "#cb1818",
  "Memory B cells" : "#cb3c18",                         # Dark blue for Mature B cells
  "Monocytes" : "#ced1a5",                   # Light orange for Monocytes
  "Neutrophil Immature" : "#fb6a4a",            # Light red for Neutro. Immature
  "Neutrophil Mature" : "#3e7e96",               # Medium red for Neutro. Mature
  "Neutrophil Pre/Pro" : "#bad2db",             # Dark red for Neutro. Pre/Pro/Pro
  "NK" : "#306948",                          # Light tan for NK cells
  "NKT" : "#c5f0d7",                         # Light salmon for NKT cells
  "pDCs" : "#dceb6e",                        # Light olive for pDCs
  "Plasma cells" : "#c947a7",                      # Light olive green for Plasma cells
  "Pre-B cells" : "#ff5972",                 # Very light blue for Pre-B cells
  "Pro-B cells" : "#fcae91",
  "Unknown 1" : "#87868f",
  "Unknown 2" : "#a8a6bd",
  "Unknown 3" : "#a8a6ad"
}

# Function
def plot_ref_umap(df, method, color_map):
    """
    Create a Plotly scatter plot of UMAP coordinates colored by group with
    non-overlapping labels, similar to Seurat's DimPlot
    
    Parameters:
    -----------
    df : pandas DataFrame
        DataFrame containing 'umap_1', 'umap_2', and 'method' columns
    method : str
        Column name in df that contains the group labels
    color_map : dict
        Dictionary mapping group values to colors
        
    Returns:
    --------
    plotly.graph_objects.Figure
    """
    import numpy as np
    
    # Get unique groups
    groups = df[method].unique()
    
    # Create a base figure
    fig = go.Figure()
    
    for group in groups:
        subset = df[df[method] == group]
        
        # Add the trace for this group
        fig.add_trace(go.Scattergl(
            x=subset['umap_1'],
            y=subset['umap_2'],
            mode='markers',
            marker=dict(
                color=color_map[group],
                size=4,
                opacity=0.7,
                symbol='circle',
            ),
            name=group,
            showlegend=False
        ))
        
        # Calculate centroid
        if group == 'CD4 T cells':
            centroid_x = subset['umap_1'].mean() - 1
            centroid_y = subset['umap_2'].mean()
        else:
            centroid_x = subset['umap_1'].mean()
            centroid_y = subset['umap_2'].mean()
        
        fig.add_annotation(
            x=centroid_x,
            y=centroid_y,
            text=group,
            font=dict(
                family="Arial",
                size=11,
                color="black"
            ),
            bgcolor="rgba(255, 255, 255, 0.7)",
            bordercolor="black",
            borderwidth=1,
            borderpad=3,
            showarrow=False
        )
    
    # Update layout
    fig.update_layout(
        title='UMAP Visualization',
        xaxis_title='UMAP 1',
        yaxis_title='UMAP 2',
        legend_title='Group',
        template='plotly_white',
        width=1000,
        height=750
    )
    
    return fig


fig = plot_ref_umap(df, 'Manual', cell_colors)
fig.show()


##### SingleR

In [106]:
# SingleR UMAP

# Read csv of results
df = pd.read_csv('../results/sbm_annotations.csv')
df

# Colour map
cell_colors = {
    "granulocyte": "#3e7e96",                       # Similar to Neutrophil Mature
    "immature B cell": "#fb6a4a",                   # Same as Immature B cells
    "pre B cell": "#ff5972",                        # Same as Pre-B cells
    "monocyte": "#ced1a5",                          # Same as Monocytes
    "granulocytopoietic cell": "#bad2db",           # Similar to Neutrophil Pre/Pro
    "naive T cell": "#7ed9a5",                      # Similar to CD8 T cells
    "promonocyte": "#b9c087",                       # Lighter version of Monocytes
    "hematopoietic precursor cell": "#abd179",      # Same as HSC/CMP
    "CD4+ positive macrophage": "#4893ad",          # Darker version of Macrophages
    "promonocyte + macropage": "#89a3b0",           # Blend of promonocyte and macrophage
    "basophil": "#a869f5",                          # Same as Basophils
    "late pro B cell": "#fcae91",                   # Same as Pro-B cells
    "plasma cell": "#c947a7",                       # Same as Plasma cells
    "unknown cell-1": "#87868f",                    # Same as Unknown 1
    "megakaryocyte-erythroid progenitor cell": "#e4554a" # Related to Erythroblasts
}


# Function
def plot_umap_with_custom_colors(df, method, color_map):
    """
    Create a Plotly scatter plot of UMAP coordinates colored by group with
    non-overlapping labels, similar to Seurat's DimPlot
    
    Parameters:
    -----------
    df : pandas DataFrame
        DataFrame containing 'umap_1', 'umap_2', and 'method' columns
    method : str
        Column name in df that contains the group labels
    color_map : dict
        Dictionary mapping group values to colors
        
    Returns:
    --------
    plotly.graph_objects.Figure
    """
    import numpy as np
    
    # Get unique groups
    groups = df[method].unique()
    
    # Create a base figure
    fig = go.Figure()
    
    for group in groups:
        subset = df[df[method] == group]
        
        # Add the trace for this group
        fig.add_trace(go.Scattergl(
            x=subset['umap_1'],
            y=subset['umap_2'],
            mode='markers',
            marker=dict(
                color=color_map[group],
                size=4,
                opacity=0.7,
                symbol='circle',
            ),
            name=group,
            showlegend=False
        ))
        
        # Calculate centroid
        centroid_x = subset['umap_1'].mean()
        centroid_y = subset['umap_2'].mean()
        
        fig.add_annotation(
            x=centroid_x,
            y=centroid_y,
            text=group,
            font=dict(
                family="Arial",
                size=11,
                color="black"
            ),
            bgcolor="rgba(255, 255, 255, 0.7)",
            bordercolor="black",
            borderwidth=1,
            borderpad=3,
            showarrow=False
        )
    
    # Update layout
    fig.update_layout(
        title='UMAP Visualization',
        xaxis_title='UMAP 1',
        yaxis_title='UMAP 2',
        legend_title='Group',
        template='plotly_white',
        width=1000,
        height=750
    )
    
    return fig


fig = plot_umap_with_custom_colors(df, 'SingleR', cell_colors)
fig.show()

In [145]:
# UMAP figures

# Read csv of results
df = pd.read_csv('../results/sbm_annotations.csv')
df

# Plot UMAP
def plot_umap_with_correction(df, method, color_map, symbol_map, label_map=None):
    """
    Create a Plotly scatter plot of UMAP coordinates colored by group
    
    Parameters:
    -----------
    df : pandas DataFrame
        DataFrame containing 'umap_1', 'umap_2', and 'method' columns
    method : str
        Column name in df that contains the group labels
    color_map : dict
        Dictionary mapping group values to colors
        
    Returns:
    --------
    plotly.graph_objects.Figure
    """
    # Get unique groups
    method_accuracy =f'{method}_res'
    groups = df[method_accuracy].unique()
    print(groups)
    
    # Create a base figure
    fig = go.Figure()
    
    # Add each colour first as a separate trace
    for group in groups:
        subset = df[df[method_accuracy] == group]
        
        fig.add_trace(go.Scattergl(
            x=subset['umap_1'],
            y=subset['umap_2'],
            mode='markers',
            marker=dict(
                color=color_map[group],
                size=4,
                opacity=0.7,
                symbol=symbol_map[group],
            ),
            name=group,
            showlegend=True
        ))

    # Now add same groups as normal annotation
    groups = df[method].unique()
    
    # Add annotation
    for group in groups:
        subset = df[df[method] == group]
        
        # Calculate centroid
        if label_map is not None:
            if group in label_map.keys():
                centroid_x = label_map[group][0]
                centroid_y = label_map[group][1]
            else:
                continue
        else:
            centroid_x = subset['umap_1'].mean()
            centroid_y = subset['umap_2'].mean()
        
        fig.add_annotation(
            x=centroid_x,
            y=centroid_y,
            text=group,
            font=dict(
                family="Arial",
                size=11,
                color="black"
            ),
            bgcolor="rgba(255, 255, 255, 0.7)",
            bordercolor="black",
            borderwidth=1,
            borderpad=3,
            showarrow=False
        )

    
    # Update layout
    fig.update_layout(
        title='UMAP Visualization',
        xaxis_title='UMAP 1',
        yaxis_title='UMAP 2',
        legend=dict(
            title = '',
            orientation="h",      # Set legend orientation to horizontal
            yanchor="bottom",     # Anchor point for y position
            y=1.02,              # Position slightly above the plot area (y=1.0 is top of plot)
            xanchor="center",     # Anchor point for x position  
            x=0.5                # Center the legend horizontally
    ),
        template='plotly_white',
        width=1000,
        height=750
    )
    
    return fig


# Plot
color_map = {
    'Correct': '#4C9F70',
    'Partially correct': '#F2CD5D',
    'Incorrect': '#F45B69',
    'Unknown': '#D3D3D3'
}

symbol_map = {
    'Correct': 'circle',
    'Partially correct': 'circle',
    'Incorrect': 'circle',
    'Unknown': 'diamond'
}

fig = plot_umap_with_correction(df, 'SingleR', color_map, symbol_map)
fig.show()



['Correct' 'Partially correct' 'Incorrect' 'Unknown']


##### scTYPE

In [117]:
# Now scType

cell_colors = {
    "Intermediate monocytes": "#b0b588",        # Between Monocytes from original
    "Non-classical monocytes": "#ced1a5",       # Same as original Monocytes
    "Pre-B cells": "#ff5972",                   # Same as original Pre-B cells
    "Neutrophils": "#3e7e96",                   # Same as original Neutrophil Mature
    "Macrophages": "#69afc9",                   # Same as original Macrophages
    "Eosinophils": "#e47979",                   # New reddish color for eosinophils
    "CD8+ NKT-like cells": "#9cdcb9",           # Mix of CD8 and NKT colors
    "Myeloid Dendritic cells": "#d8cc4a",       # Similar to original cDCs
    "Naive B cells": "#42a1d4",                 # New blue for B cells
    "Progenitor cells": "#abd179",              # Same as original HSC/CMP
    "Natural killer  cells": "#306948",          # Same as original NK
    "Effector CD4+ T cells": "#4a7c60",         # Darker version of CD4 T cells
    "Naive CD8+ T cells": "#7ed9a5",            # Same as original CD8 T cells
    "ISG expressing immune cells": "#9595a8",   # New gray-purple color
    "Pro-B cells": "#fcae91",                   # Same as original Pro-B cells
    "Basophils": "#a869f5",                     # Same as original Basophils
    "Plasma B cells": "#c947a7",                # Same as original Plasma cells
    "Erythroid-like and erythroid precursor cells": "#cb1818",  # Same as original Erythroblasts
    "Naive CD4+ T cells": "#629c7b"             # Same as original CD4 T cells
}

# Initiate labels
label_map = {}

# Get unique groups
groups = df['scTYPE'].unique()

# Create a base figure
fig = go.Figure()

for group in groups:
    subset = df[df['scTYPE'] == group]
    
    # Add the trace for this group
    fig.add_trace(go.Scattergl(
        x=subset['umap_1'],
        y=subset['umap_2'],
        mode='markers',
        marker=dict(
            color=cell_colors[group],
            size=4,
            opacity=0.7,
            symbol='circle',
        ),
        name=group,
        showlegend=False
    ))
    
    # Calculate centroid
    if group == "Pre-B cells":
        centroid_x = subset['umap_1'].mean() + 2
        centroid_y = subset['umap_2'].mean()
    elif group == "Natural killer  cells":
        centroid_x = subset['umap_1'].mean()
        centroid_y = subset['umap_2'].mean() - 0.3
    elif group == "Effector CD4+ T cells":
        centroid_x = subset['umap_1'].mean() - 1
        centroid_y = subset['umap_2'].mean()
    elif group == "Naive CD8+ T cells":
        centroid_x = subset['umap_1'].mean() +1
        centroid_y = subset['umap_2'].mean()
    else:
        centroid_x = subset['umap_1'].mean()
        centroid_y = subset['umap_2'].mean()

    # Add to label map
    label_map[group] = (centroid_x, centroid_y)
    
    fig.add_annotation(
        x=centroid_x,
        y=centroid_y,
        text=group,
        font=dict(
            family="Arial",
            size=11,
            color="black"
        ),
        bgcolor="rgba(255, 255, 255, 0.7)",
        bordercolor="black",
        borderwidth=1,
        borderpad=3,
        showarrow=False
    )

# Update layout
fig.update_layout(
    title='UMAP Visualization',
    xaxis_title='UMAP 1',
    yaxis_title='UMAP 2',
    legend_title='Group',
    template='plotly_white',
    width=1000,
    height=750
)

fig.show()

In [118]:
#scTYPE correction
fig = plot_umap_with_correction(df, 'scTYPE', color_map=color_map, symbol_map=symbol_map, label_map=label_map)
fig.show()

['Correct' 'Incorrect' 'Partially correct' 'Unknown']


##### GPTCelltype base

In [143]:
# GPTCelltype UMAP

cell_colors = {
    "B cell": "#22ace0",                          # Same as original Pax5 B cells
    "Neutrophil": "#3e7e96",                      # Same as original Neutrophil Mature
    "Dendritic cells": "#c9bc2c",                 # Same as original cDCs
    "Plasmacytoid Dendritic Cell (pDC)": "#dceb6e", # Same as original pDCs
    "Monocyte and Macrophage": "#9ebfb7",         # Blend of Monocytes and Macrophages
    "Monocyte and Neutrophil": "#a6b7b0",         # Blend of Monocyte and Neutrophil
    "Basophils and Mast cells": "#6175e0",        # Blend of Basophils and Mast cells
    "Pro-B cells": "#fcae91",                     # Same as original Pro-B cells
    "Dendritic cell progenitors": "#d8d056",      # Lighter shade of Dendritic cells
    "Macrophages": "#69afc9",                     # Same as original Macrophages
    "T cells": "#5a8c72",                         # Blend of CD4/CD8 T cells
    "Neutrophil progenitor": "#bad2db",           # Same as original Neutrophil Pre/Pro
    "Interferon-stimulated macrophages": "#4a91ad", # Darker variant of Macrophages
    "Monocyte": "#ced1a5",                        # Same as original Monocytes
    "Neutrophil and Monocyte": "#a6b9bb",         # Another blend of Neutrophil and Monocyte
    "B cell progenitor": "#7fb3db",               # Between B cell and progenitor colors
    "NK cell": "#306948",                         # Same as original NK
    "Monocytes": "#ced1a5",                       # Same as original Monocytes (duplicate)
    "T cell/NK cell": "#45804c",                  # Blend of T cell and NK cell
    "Neutrophils": "#3e7e96",                     # Same as Neutrophil (duplicate)
    "Erythrocytes": "#cb1818",                    # Same as original Erythroblasts
    "Granulocyte-Monocyte progenitor": "#85c245", # Between GMP and MDP
    "Proliferating B progenitor": "#d986af",      # Distinctive color for proliferating subset
    "Proliferating cells": "#b970b2",             # Distinctive proliferating cell color
    "T cell (CD4+ T cell subset)": "#629c7b",     # Same as original CD4 T cells
    "Hematopoietic Stem and Progenitor Cell": "#abd179", # Same as original HSC/CMP
    "T cell (CD8+ T cell subset)": "#7ed9a5",     # Same as original CD8 T cells
    "Granulocyte progenitor": "#93c7d7",          # Related to neutrophil progenitor
    "Pre-B cells": "#ff5972",                     # Same as original Pre-B cells
    "Erythroid cells": "#d93f3f",                 # Variant of Erythrocytes
    "B cells": "#22ace0",                         # Same as B cell (duplicate)
    "Monocytes and Neutrophils": "#a6b7b0",       # Same as Monocyte and Neutrophil
    "Mast cells and Basophils": "#6175e0",        # Same as Basophils and Mast cells
    "Plasma cells": "#c947a7"                     # Same as original Plasma cells
}



# Initiate labels
label_map = {}

# Get unique groups
groups = df['GPTCelltype'].unique()

# Create a base figure
fig = go.Figure()

for group in groups:
    subset = df[df['GPTCelltype'] == group]
    
    # Add the trace for this group
    fig.add_trace(go.Scattergl(
        x=subset['umap_1'],
        y=subset['umap_2'],
        mode='markers',
        marker=dict(
            color=cell_colors[group],
            size=4,
            opacity=0.7,
            symbol='circle',
        ),
        name=group,
        showlegend=False
    ))
    
    # Calculate centroid
    if group == "Neutrophil progenitor":
        centroid_x = subset['umap_1'].mean()
        centroid_y = subset['umap_2'].mean() - 1
    elif group == "Erythrocytes":
        centroid_x = subset['umap_1'].mean()
        centroid_y = subset['umap_2'].mean() - 0.3
    elif group == "Monocytes":
        centroid_x = subset['umap_1'].mean()
        centroid_y = subset['umap_2'].mean() + 0.8
    elif group == "Dendritic cell progenitors":
        centroid_x = subset['umap_1'].mean() + 0.05
        centroid_y = subset['umap_2'].mean() - 1
    elif group == "Mast cells and Basophils":
        centroid_x = subset['umap_1'].mean()
        centroid_y = subset['umap_2'].mean() - 1.5
    elif group == "T cell (CD4+ T cell subset)":
        centroid_x = subset['umap_1'].mean() - 0.05
        centroid_y = subset['umap_2'].mean() - 1
    else:
        centroid_x = subset['umap_1'].mean()
        centroid_y = subset['umap_2'].mean()

    # Add to label map
    label_map[group] = (centroid_x, centroid_y)
    
    fig.add_annotation(
        x=centroid_x,
        y=centroid_y,
        text=group,
        font=dict(
            family="Arial",
            size=11,
            color="black"
        ),
        bgcolor="rgba(255, 255, 255, 0.7)",
        bordercolor="black",
        borderwidth=1,
        borderpad=3,
        showarrow=False
    )

# Update layout
fig.update_layout(
    title='UMAP Visualization',
    xaxis_title='UMAP 1',
    yaxis_title='UMAP 2',
    legend_title='Group',
    template='plotly_white',
    width=1000,
    height=750
)

fig.show()

In [144]:
fig = plot_umap_with_correction(df, 'GPTCelltype', color_map=color_map, symbol_map=symbol_map, label_map=label_map)
fig.show()

['Partially correct' 'Correct' 'Unknown' 'Incorrect']


##### GPTCelltype final

In [162]:
cell_colors = {
    "B Cells": "#22ace0",                                              # Same as original Pax5 B cells
    "Neutrophil": "#3e7e96",                                           # Same as original Neutrophil Mature
    "Dendritic Cells": "#c9bc2c",                                      # Same as original cDCs
    "Plasmacytoid Dendritic Cells (Pdc)": "#dceb6e",                   # Same as original pDCs
    "Proliferating Monocyte/Macrophage": "#8dbcd0",                    # Same as previous Proliferating Monocytes/Macrophages
    "Neutrophils/Neutrophil Progenitors": "#7ca0b0",                   # Blend of Neutrophils and progenitors
    "Mast Cells": "#1882cc",                                           # Same as original Mast cells
    "Pro-B Cells/Pre-B Cells": "#ffa185",                              # Blend of Pro-B and Pre-B cells
    "Hematopoietic Stem/Progenitor Cells": "#abd179",                  # Same as original HSC/CMP
    "Macrophages": "#69afc9",                                          # Same as original Macrophages  
    "T Cells/Nk Cells": "#45804c",                                     # Same as previous T cells/NK cells
    "Neutrophil Progenitor": "#bad2db",                                # Same as original Neutrophil Pre/Pro
    "Interferon-Stimulated Macrophages": "#4a91ad",                    # Same as previous interferon-stimulated macrophages
    "Neutrophils And Monocytes": "#a6b7b0",                            # Same as previous Neutrophils and Monocytes
    "Pre-B Cells/Pro-B Cells": "#ffa185",                              # Same as Pro-B Cells/Pre-B Cells above
    "Nk Cells": "#306948",                                             # Same as original NK
    "Monocytes": "#ced1a5",                                            # Same as original Monocytes
    "Neutrophils": "#3e7e96",                                          # Same as Neutrophil above
    "Neutrophils  ": "#3e7e96",                                        # Same as Neutrophils (with space)
    "Erythroid Cells": "#cb1818",                                      # Same as original Erythroblasts
    "Proliferating B Cell Progenitors": "#d986af",                     # Same as previous Proliferating B cells
    "Monocyte/Macrophage": "#9ebfb7",                                  # Same as previous Monocytes/Macrophages
    "Proliferating Cells": "#b970b2",                                  # Same as previous Proliferating Cells
    "T Cells": "#5a8c72",                                              # Same as previous T cells
    "Hematopoietic Stem And Progenitor Cells": "#abd179",              # Same as Hematopoietic Stem/Progenitor Cells
    "T Cells/T Cell (Cd8+)": "#6ab58d",                                # Blend with emphasis on CD8+ T cells
    "Dendritic Cells/Hematopoietic Stem/Progenitor Cells": "#c4cd57",  # Blend of DC and HSPC colors
    "Granulocyte Progenitors/Neutrophil Progenitor": "#93c7d7",        # Same as previous Granulocyte progenitor
    "B Cell Progenitors": "#7fb3db",                                   # Same as previous B cell progenitor
    "Neutrophil/Monocyte/Neutrophil": "#a6b7b0",                       # Same as Neutrophils And Monocytes
    "Hematopoietic Progenitor Cells/Hematopoietic Progenitors": "#b7dd8c", # Same as previous Hematopoietic Progenitors
    "Pre-B Cells/Immature B Cells": "#fd6260",                         # Blend of Pre-B and Immature B
    "Pro-B Cells": "#fcae91",                                          # Same as original Pro-B cells
    "Monocytes And Neutrophils": "#a6b7b0",                            # Same as Neutrophils And Monocytes
    "Plasma Cells": "#c947a7",                                         # Same as original Plasma cells
    "Hematopoietic Progenitor Cells/Hematopoietic Stem/Progenitor Cells": "#abd179" # Same as HSC/CMP with slight variation
}

# Initiate labels
label_map = {}

# Get unique groups
groups = df['Final'].unique()

# Create a base figure
fig = go.Figure()

for group in groups:
    subset = df[df['Final'] == group]
    
    # Add the trace for this group
    fig.add_trace(go.Scattergl(
        x=subset['umap_1'],
        y=subset['umap_2'],
        mode='markers',
        marker=dict(
            color=cell_colors[group],
            size=4,
            opacity=0.7,
            symbol='circle',
        ),
        name=group,
        showlegend=False
    ))
    
    # Calculate centroid
    if group == "Proliferating Cells":
        continue
    elif group == "Neutrophil Progenitor":
        continue
    elif group == "Neutrophil/Monocyte/Neutrophil":
        continue
    elif group == "Neutrophils" or group == "Neutrophils  ":
        continue
    elif group == "Mast Cells and Basophils":
        continue
    elif group == "Hematopoietic Progenitor Cells/Hematopoietic Progenitors" or \
         group == "Hematopoietic Progenitor Cells/Hematopoietic Stem/Progenitor Cells" or \
         group == 'Hematopoietic Stem/Progenitor Cells':
        continue
    elif group == "T Cells":
        centroid_x = subset['umap_1'].mean() - 1.5
        centroid_y = subset['umap_2'].mean()
    elif group == "Monocytes And Neutrophils" or group == "Monocyte/Macrophage":
        continue
    elif group == "Pro-B Cells":
        continue
    elif group == "Dendritic Cells":
        centroid_x = subset['umap_1'].mean()
        centroid_y = subset['umap_2'].mean() - 0.3
    else:
        centroid_x = subset['umap_1'].mean()
        centroid_y = subset['umap_2'].mean()

    # Add to label map
    label_map[group] = (centroid_x, centroid_y)
    
    fig.add_annotation(
        x=centroid_x,
        y=centroid_y,
        text=group,
        font=dict(
            family="Arial",
            size=11,
            color="black"
        ),
        bgcolor="rgba(255, 255, 255, 0.7)",
        bordercolor="black",
        borderwidth=1,
        borderpad=3,
        showarrow=False
    )

# Update layout
fig.update_layout(
    title='UMAP Visualization',
    xaxis_title='UMAP 1',
    yaxis_title='UMAP 2',
    legend_title='Group',
    template='plotly_white',
    width=1000,
    height=750
)

fig.show()

In [163]:
fig = plot_umap_with_correction(df, 'Final', color_map=color_map, symbol_map=symbol_map, label_map=label_map)
fig.show()

['Partially correct' 'Correct' 'Incorrect' 'Unknown']


##### Gemini

In [168]:


cell_colors = {
    "Immature B cell": "#fb6a4a",                                 # Same as original Immature B cells
    "Neutrophil": "#3e7e96",                                      # Same as original Neutrophil Mature
    "Conventional Dendritic Cell type 1 (cDC1)": "#c9bc2c",       # Same as original cDCs
    "B cell": "#22ace0",                                          # Same as original Pax5 B cells
    "Plasmacytoid Dendritic Cell": "#dceb6e",                     # Same as original pDCs
    "Proliferating Monocyte Progenitor": "#e3d7a2",               # Related to Monocytes with proliferating aspect
    "Proliferating Granulocyte Precursor": "#98c2d1",             # Related to Neutrophil with proliferating aspect
    "Basophil / Mast cell": "#6175e0",                            # Same as previous Basophils and Mast Cells
    "Pro-B cell": "#fcae91",                                      # Same as original Pro-B cells
    "Lymphoid-primed Multipotent Progenitor (LMPP)": "#98c4e3",   # Same as previous Early lymphoid progenitors
    "Macrophage": "#69afc9",                                      # Same as original Macrophages
    "T cell": "#5a8c72",                                          # Same as previous T cells
    "Promyelocyte": "#7ca0b0",                                    # Related to Neutrophil progenitors
    "Interferon-Stimulated Monocyte / Macrophage": "#4a91ad",     # Same as previous Interferon-Stimulated Macrophages
    "Pre-B cell": "#ff5972",                                      # Same as original Pre-B cells
    "NK cell": "#306948",                                         # Same as original NK
    "Monocyte / Macrophage": "#9ebfb7",                           # Same as previous Monocytes/Macrophages
    "T cell / NK cell": "#45804c",                                # Same as previous T cells/NK cells
    "Interferon-Stimulated Neutrophil": "#607384",                # New color, related to Neutrophil and ISG cells
    "Erythroid cell": "#cb1818",                                  # Same as original Erythroblasts
    "Proliferating Pre-B cell": "#ff8da6",                        # Related to Pre-B cell with proliferating aspect
    "CD4+ T cell": "#629c7b",                                     # Same as original CD4 T cells
    "Hematopoietic Stem and Progenitor Cell": "#abd179",          # Same as original HSC/CMP
    "CD8+ T cell": "#7ed9a5",                                     # Same as original CD8 T cells
    "Myelocyte / Immature Neutrophil": "#bad2db",                 # Related to Neutrophil progenitors
    "Early Lymphoid Progenitor": "#98c4e3",                       # Same as previous Early lymphoid progenitors
    "Mature B cell": "#cb1818",                                   # Same as original Mature B cells
    "Basophil / Mast Cell Progenitor": "#8990d6",                 # Related to Basophil / Mast cell
    "Plasma cell": "#c947a7",                                     # Same as original Plasma cells
    "Myeloid cell": "#97b397"                                     # Same as previous Myeloid cells
}


# Initiate labels
label_map = {}

# Get unique groups
groups = df['Gemini'].unique()

# Create a base figure
fig = go.Figure()

for group in groups:
    subset = df[df['Gemini'] == group]
    
    # Add the trace for this group
    fig.add_trace(go.Scattergl(
        x=subset['umap_1'],
        y=subset['umap_2'],
        mode='markers',
        marker=dict(
            color=cell_colors[group],
            size=4,
            opacity=0.7,
            symbol='circle',
        ),
        name=group,
        showlegend=False
    ))
    
    # Calculate centroid
    if group == "Myeloid cell":
        continue
    elif group == "T cell / NK cell":
        continue
    elif group == "Basophil / Mast Cell Progenitor":
        continue
    elif group == "Lymphoid-primed Multipotent Progenitor (LMPP)":
        continue
    elif group == "Proliferating Monocyte Progenitor":
        centroid_x = subset['umap_1'].mean() + 0.4
        centroid_y = subset['umap_2'].mean() + 0.6
    else:
        centroid_x = subset['umap_1'].mean()
        centroid_y = subset['umap_2'].mean()

    # Add to label map
    label_map[group] = (centroid_x, centroid_y)
    
    fig.add_annotation(
        x=centroid_x,
        y=centroid_y,
        text=group,
        font=dict(
            family="Arial",
            size=11,
            color="black"
        ),
        bgcolor="rgba(255, 255, 255, 0.7)",
        bordercolor="black",
        borderwidth=1,
        borderpad=3,
        showarrow=False
    )

# Update layout
fig.update_layout(
    title='UMAP Visualization',
    xaxis_title='UMAP 1',
    yaxis_title='UMAP 2',
    legend_title='Group',
    template='plotly_white',
    width=1000,
    height=750
)

fig.show()

In [169]:
fig = plot_umap_with_correction(df, 'Gemini', color_map=color_map, symbol_map=symbol_map, label_map=label_map)
fig.show()

['Correct' 'Partially correct' 'Unknown']


### Choroid Plexus results

In [75]:
# Figure 4 - show summary results for marker-based annotation
# Simple barchart 

ref_datasets=['scCATCH', 'scTYPE', 'scMayoMap']

fig = go.Figure(data=[
    go.Bar(name='Correct', x=ref_datasets, y=[10, 21, 19], marker_color='green'),
    go.Bar(name='Partially correct', x=ref_datasets, y=[4, 5, 9], marker_color='orange'),
    go.Bar(name='Incorrect', x=ref_datasets, y=[40, 28, 26], marker_color='red'),
], layout=dict(
        barcornerradius=5,
    ),)
# Change the bar mode
fig.update_layout(barmode='group', 
                  template='none',
                  yaxis = dict(title='Nb of clusters',
                               title_font = dict(size=14)),
                  title = dict(text = 'Annotation results for 3 different marker-based annotation methods',
                               font=dict(size=16),
                               xanchor='center',
                               yanchor='middle',
                               x=0.5,
                               y=0.95))
fig.show()
pio.write_image(fig, '../../PNG_results/cp_barchart_markerbased_results.png')

#### UMAPs

##### Base

##### SingleR

##### scMayoMap

##### GPTCelltype final

##### Gemini

### SBM and CP combined results

In [5]:
# Joint barchart comparing all annotations cross SBM and CP datasets

# Figure x - Plotting gptcelltype results versus other marker, correlation and supervised learning methods
# Methods compared : SingleR, scMAP, scType, scMayoMap, scClassify, scDeepSort, CellTypist
ref_methods=['scMap', 'SingleR','scCATCH', 'scTYPE', 'scMayoMap', 'scClassify', 'scDeepSort']


fig = go.Figure(data=[
    go.Bar(name='Choroid Plexus', x=ref_methods, y=np.array([26, 49, 14, 26, 28, 44, 0]) / 54 * 100,
           marker_color='#3A4F41'),
    go.Bar(name='Skull Bone Marrow', x=ref_methods, y=np.array([27, 41, 21, 38, 41, 41, 39]) / 50 * 100, 
           marker_color='#B9314F'),
], layout=dict(
        barcornerradius=5,
    ),)
# Change the bar mode
fig.update_layout(barmode='group', 
                  template='none',
                  yaxis = dict(title='Proportion of correct + partially correct (%)',
                               title_font = dict(size=14)),
                  title = dict(text = 'Annotation results across different methods',
                               font=dict(size=16),
                               xanchor='center',
                               yanchor='middle',
                               x=0.5,
                               y=0.95))
fig.show()
#pio.write_image(fig, '../../PNG_results/sbm_barchart_markerbased_results.png')

In [80]:
# Attempt 3

import plotly.graph_objects as go
import numpy as np

ref_methods = ['scMap', 'SingleR', 'scCATCH', 'scTYPE', 'scMayoMap', 'scClassify', 'scDeepSort']

# Original data - total proportions (correct + partially correct)
choroid_plexus_total = np.array([26, 49, 14, 26, 28, 44, 0]) / 54 * 100
skull_bone_marrow_total = np.array([27, 41, 21, 38, 41, 41, 39]) / 50 * 100

# Split each into correct (50%) and partially correct (50%)
choroid_plexus_correct = np.array([17, 40, 10, 21, 19, 34, 0]) / 54 * 100
choroid_plexus_partial = np.array([9, 9, 4, 5, 9, 10, 0]) / 54 * 100

skull_bone_marrow_correct = np.array([8, 10, 13, 17, 12, 9, 6]) / 50 * 100
skull_bone_marrow_partial = np.array([19, 31, 8, 21, 29, 32, 24]) / 50 * 100

# Custom x-positions to group bars by method but keep tissues separate
# Using smaller gap between tissue groups (0.2 instead of 0.3)
x_choroid = np.arange(len(ref_methods))
x_skull = x_choroid + 0.4  # Reduced gap between tissue groups

# Create the figure
fig = go.Figure()

# Choroid Plexus - Stacked bars (correct at bottom, partially correct on top)
fig.add_trace(go.Bar(
    name='Choroid Plexus (Correct)',
    x=x_choroid,
    y=choroid_plexus_correct,
    marker_color='#3A4F41',
    width=0.4,  # Reduced width for closer bars
    base=0,
    legendgroup='Choroid Plexus',
    showlegend=False,
    text=[f"{val:.1f}%" for val in choroid_plexus_correct],  # Add percentage labels
    textposition='inside',
    textfont=dict(color='white', size=10),
    insidetextanchor='middle'
))

fig.add_trace(go.Bar(
    name='Choroid Plexus (Partially Correct)',
    x=x_choroid,
    y=choroid_plexus_partial,
    marker_color='rgba(58, 79, 65, 0.6)',  # Lighter version of #3A4F41
    width=0.4,  # Reduced width for closer bars
    base=choroid_plexus_correct,  # Stack on top of correct portion
    legendgroup='Choroid Plexus',
    showlegend=False,
    text=[f"{val:.1f}%" for val in choroid_plexus_partial],  # Add percentage labels
    textposition='inside',
    textfont=dict(color='white', size=10),
    insidetextanchor='middle'
))

# Skull Bone Marrow - Stacked bars (correct at bottom, partially correct on top)
fig.add_trace(go.Bar(
    name='Skull Bone Marrow (Correct)',
    x=x_skull, 
    y=skull_bone_marrow_correct,
    marker_color='#B9314F',
    width=0.4,  # Reduced width for closer bars
    base=0,
    legendgroup='Skull Bone Marrow',
    showlegend=False,
    text=[f"{val:.1f}%" for val in skull_bone_marrow_correct],  # Add percentage labels
    textposition='inside',
    textfont=dict(color='white', size=10),
    insidetextanchor='middle'
))

fig.add_trace(go.Bar(
    name='Skull Bone Marrow (Partially Correct)',
    x=x_skull,
    y=skull_bone_marrow_partial,
    marker_color='rgba(185, 49, 79, 0.6)',  # Lighter version of #B9314F
    width=0.4,  # Reduced width for closer bars
    base=skull_bone_marrow_correct,  # Stack on top of correct portion
    legendgroup='Skull Bone Marrow',
    showlegend=False,
    text=[f"{val:.1f}%" for val in skull_bone_marrow_partial],  # Add percentage labels
    textposition='inside',
    textfont=dict(color='white', size=10),
    insidetextanchor='middle'
))

# Add visible legend items for tissues
fig.add_trace(go.Bar(
    name='Choroid Plexus',
    x=[None], 
    y=[None],
    marker_color='#3A4F41',
    legendgroup='Choroid Plexus'
))

fig.add_trace(go.Bar(
    name='Skull Bone Marrow',
    x=[None], 
    y=[None],
    marker_color='#B9314F',
    legendgroup='Skull Bone Marrow'
))

# Add rectangles around scDeepSort bars
# Index of scDeepSort in ref_methods
deepsort_idx = ref_methods.index('scDeepSort')
x_deepsort_choroid = x_choroid[deepsort_idx]
x_deepsort_skull = x_skull[deepsort_idx]

# Rectangle for "Correct" (bottom part of Skull Bone Marrow bar since Choroid Plexus is 0)
fig.add_shape(
    type="rect",
    x0=x_deepsort_skull - 0.2,  # Wider than the bar for visibility
    y0=0,  # Start slightly below to ensure visibility
    x1=x_deepsort_skull + 0.2,
    y1=skull_bone_marrow_correct[deepsort_idx],  # End at top of correct portion
    line=dict(color="black", width=2),
    fillcolor="rgba(0,0,0,0)",
)

# Add "Correct" label
fig.add_annotation(
    x=x_deepsort_skull - 0.55,  # Position to the right of the bar
    y=skull_bone_marrow_correct[deepsort_idx] / 2,  # Middle of correct portion
    text="Correct",
    showarrow=True,
    arrowhead=2,
    arrowsize=1,
    arrowwidth=1,
    arrowcolor="black",
    ax=20,  # Length of the arrow in pixels
    ay=0,   # No vertical component
    font=dict(color="black", size=10),
    bordercolor="black",
    bgcolor="rgba(255,255,255,0.7)",
)

# Rectangle for "Partially Correct" (top part of Skull Bone Marrow bar)
fig.add_shape(
    type="rect",
    x0=x_deepsort_skull - 0.2,  # Wider than the bar for visibility
    y0=skull_bone_marrow_correct[deepsort_idx] - 0.7,  # Start at top of correct portion
    x1=x_deepsort_skull + 0.2,
    y1=skull_bone_marrow_total[deepsort_idx] - 18,  
    line=dict(color="black", width=2),
    fillcolor="rgba(0,0,0,0)",
)

# Add "Partially Correct" label
fig.add_annotation(
    x=x_deepsort_skull - 0.4,  # Position to the right of the bar
    y=skull_bone_marrow_correct[deepsort_idx] + skull_bone_marrow_partial[deepsort_idx] / 3,  # Middle of partial portion
    text="Partially<br>Correct",
    showarrow=False,
    font=dict(color="black", size=10),
    bordercolor="black",
    bgcolor="rgba(255,255,255,0.7)",
)

# Update layout
fig.update_layout(
    barmode='overlay',  # Use overlay to handle the custom stacking
    barcornerradius=1,
    template='none',
    yaxis=dict(
        title='Proportion of correct + partially correct (%)',
        title_font=dict(size=14)
    ),
    title=dict(
        text='Annotation results across different methods',
        font=dict(size=16),
        xanchor='center',
        yanchor='middle',
        x=0.5,
        y=0.95
    ),
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    ),
    xaxis=dict(
        tickmode='array',
        tickvals=x_choroid + 0.2,  # Center tick labels between the tissue pairs
        ticktext=ref_methods
    )
)

fig.show()

In [31]:
# Figure X - Best results for both tissues

labels = ["Correct", "Partially correct", "Incorrect"]

# Create subplots: use 'domain' type for Pie subplot
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, 
                                            {'type':'domain'}]])
fig.add_trace(go.Pie(labels=labels, values=[40, 9, 5], name="CP"),
              1, 1)
fig.add_trace(go.Pie(labels=labels, values=[10, 31, 9], name="SBM"),
              1, 2)

# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.4, hoverinfo="label+percent+name", 
                  marker=dict(colors=['#4C9F70', '#F2CD5D', '#F45B69']))

fig.update_layout(
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='CP', x=sum(fig.get_subplot(1, 1).x) / 2, y=0.5,
                      font_size=14, showarrow=False, xanchor="center"),
                 dict(text='SBM', x=sum(fig.get_subplot(1, 2).x) / 2, y=0.5,
                      font_size=14, showarrow=False, xanchor="center")])

fig.show()

Now we explore of the GPTCelltype results

In [30]:
# Figure X - GPTCelltype variations for both tissues/conditions
# Will do the following : GPT model, GPT prompt engineering, 

categories = ['GPT-o3mini','GPT-4', 'GPT-4.1','GPT-4.5 Preview']

# Results SBM
sbm_correct = np.array([13, 8, 15, 18]) / 50 * 100
sbm_partially_correct = np.array([26, 30, 23, 26]) / 50 * 100

# Results CP
cp_correct = np.array([28, 13, 22, 26]) / 50 * 100
cp_partially_correct = np.array([9, 7, 12, 9]) / 50 * 100

fig = go.Figure()

fig.add_trace(go.Scatterpolar(
      r=sbm_correct + sbm_partially_correct,
      theta=categories,
      fill='toself',
      line=dict(color='#B9314F'),
      name='Skull Bone Marrow'
))
fig.add_trace(go.Scatterpolar(
      r=cp_correct + cp_partially_correct,
      theta=categories,
      fill='toself',
      line=dict(color='#3A4F41'),
      name='Choroid Plexus'
))

fig.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True
    )),
  template='none',
  showlegend=True,
  title=dict(
        text='Percentage of correct + partially correct for different GPT models',
        font=dict(size=16),
        xanchor='center',
        yanchor='middle',
        x=0.5,
        y=0.95
    ),
    legend=dict(
        orientation="v",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=0.8
    ),
)

fig.show()

In [102]:
# Simple figure for marker genes and how they influence the results

# Results
marker_genes = [10, 20, 30, 40, 50, 60]
sbm_error = np.array([6, 4, 4, 3, 2, 3]) / 50 * 100
cp_error = np.array([16, 13, 10, 11, 10, 12]) / 50 * 100

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=marker_genes,
    y=sbm_error,
    mode='lines+markers',
    name='Skull Bone Marrow',
    line=dict(color='#B9314F'),
    marker=dict(size=8, symbol = 'x')
))

fig.add_trace(go.Scatter(
    x=marker_genes,
    y=cp_error,
    mode='lines+markers',
    name='Choroid Plexus',
    line=dict(color='#3A4F41'),
    marker=dict(size=8, symbol = 'x')
))

fig.update_layout(
    title='',
    xaxis_title='Number of Marker Genes',
    yaxis_title='Annotation Error (%)',
    template='plotly_white',
    showlegend=True,
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=0.8
    ),
    width=500,
    height=500
)

fig.show()

In [195]:
# Show lack of stability of GPTCelltype and why it is usefult average multiple rounds

# Classes
x = ['SBM']*20 + ['CP']*20

# Incorrect
incorrect = np.array([4, 5, 4, 3, 3, 2, 3, 3, 1, 3,
                      3, 3, 3, 2, 4, 3, 3, 2, 3, 2,
                      12, 10, 9, 13, 12, 8, 14, 12, 14, 12,
                      7, 15, 8, 10, 6, 9, 15, 8, 10, 11])

# Correct
correct = np.array([20, 18, 20, 17, 21, 20, 20, 17, 20, 20,
                    19, 18, 17, 23, 23, 17, 19, 21, 22, 21,
                    32, 34, 35, 31, 32, 36, 30, 32, 30, 32,
                    37, 29, 36, 34, 38, 35, 29, 36, 34, 33])

fig = go.Figure()

# Only show points (no boxes)
fig.add_trace(go.Box(
    y=incorrect,
    x=x,
    name='Incorrect',
    marker=dict(color='#F45B69'),
    boxpoints='all',
    jitter=0.3,
    pointpos=0,
    fillcolor='rgba(0,0,0,0)',  
    line=dict(color='black',
              width = 0.2),
    showlegend=False
))
fig.add_trace(go.Box(
    y=correct,
    x=x,
    name='Correct',
    marker=dict(color='#4C9F70'),
    boxpoints='all',
    jitter=0.3,
    pointpos=0,
    fillcolor='rgba(0,0,0,0)',
    line=dict(color='black',
              width = 0.2),
    showlegend=False
))

# Dummy scatter traces for legend
fig.add_trace(go.Scatter(
    x=[None],
    y=[None],
    mode='markers',
    name='Incorrect',
    marker=dict(color='#F45B69', size=10),
    showlegend=True
))

fig.add_trace(go.Scatter(
    x=[None],
    y=[None],
    mode='markers',
    name='Correct',
    marker=dict(color='#4C9F70', size=10),
    showlegend=True
))

# Separation line
fig.add_shape(
    type='line',
    x0=0.5,
    x1=0.5,
    y0=0.024,
    y1=1,
    xref='x',
    yref='paper',
    line=dict(color='black', width=1)
)

# Update layout
fig.update_layout(
    template='none',
    width=600,   # Less wide
    height=500,  # A little taller
    yaxis=dict(
        title=dict(text='Number of clusters')
    ),
    xaxis=dict(
        title=dict(
            text='Tissue',
            font=dict(size=14)
        ),
        tickfont=dict(size=14, color='black'),
        tickvals=['SBM', 'CP'],
        ticktext=[
            '<span style="color:#B9314F">SBM</span>',
            '<span style="color:#3A4F41">CP</span>'
        ]
    ),
    boxmode='group'
)

fig.show()
