In [60]:
import pandas as pd
import plotly.graph_objs as go
import ipywidgets as widgets
from ipywidgets import HBox
import numpy as np
import html

# Load data
df = pd.read_csv("FGF14_RO.csv")  # or ATXN8OS.csv if running locally

# Parameters
bin_width = 3
df.loc[df['assigned_haplotype'] == '', 'assigned_haplotype'] = 'Other'
top_haps = df['assigned_haplotype'].value_counts().head(10).index.tolist()
df = df[df['assigned_haplotype'].isin(top_haps)]

# Bin repeat lengths
df['repeat_bin'] = (df['repeatLength'] // bin_width) * bin_width
df['repeat_bin_center'] = df['repeat_bin'] + bin_width / 2

# Count unique encodedSeqs
seq_counts = (
    df.groupby(['repeat_bin_center', 'assigned_haplotype', 'encodedSeq'])
    .size()
    .reset_index(name='encodedSeq_count')
)

def format_tooltip(group):
    return "<br>".join(
        f"{html.escape(seq)} (n={int(count)})"
        for seq, count in zip(group['encodedSeq'], group['encodedSeq_count'])
    )

tooltip_df = (
    seq_counts
    .groupby(['repeat_bin_center', 'assigned_haplotype'])
    .apply(format_tooltip, include_groups=False)
    .reset_index(name='encodedSeq_summary')
)

bar_counts = (
    df.groupby(['repeat_bin_center', 'assigned_haplotype'])
    .size()
    .reset_index(name='count')
)

merged = pd.merge(bar_counts, tooltip_df, on=['repeat_bin_center', 'assigned_haplotype'], how='left')

def shorten_label(haplotype, max_len=50):
    return haplotype if len(haplotype) <= max_len else haplotype[:max_len] + "…"

merged['legend_label'] = merged['assigned_haplotype'].apply(shorten_label)

# === FigureWidget setup ===
fig = go.FigureWidget()
haplotypes = merged['assigned_haplotype'].unique()

for hap in haplotypes:
    sub = merged[merged['assigned_haplotype'] == hap]
    fig.add_bar(
        x=sub['repeat_bin_center'],
        y=sub['count'],
        name=sub['legend_label'].iloc[0],
        customdata=sub['encodedSeq_summary'],
        hovertemplate=(
            "Repeat Length Bin Center: %{x}<br>" +
            "Count: %{y}<br>" +
            "EncodedSeqs:<br>%{customdata}<extra></extra>"
        )
    )

fig.update_layout(
    barmode='stack',
    height=600,
    width=1000,
    xaxis=dict(
        title='Repeat Length (binned, bp)',
        rangeslider=dict(visible=True),
        type='linear'
    ),
    yaxis=dict(title='Allele Count'),
    legend=dict(
        orientation="v",
        x=1.02,
        y=1,
        title="Haplotype",
        font=dict(size=10)
    )
)

# === Slider ===
max_y = int(merged.groupby('repeat_bin_center')['count'].sum().max() * 1.5)

y_slider = widgets.IntRangeSlider(
    value=[0, int(max_y * 0.9)],
    min=0,
    max=max_y,
    step=10,
    description='Y-axis',
    layout=widgets.Layout(width='70px', height='600px'),
    continuous_update=False,
    orientation='vertical',
    style={'description_width': '0px'}
)

def update_y_range(change):
    fig.update_yaxes(range=change['new'])

y_slider.observe(update_y_range, names='value')

# === Voilà output ===
HBox([fig, y_slider])


HBox(children=(FigureWidget({
    'data': [{'customdata': array(['&lt;AAG&gt;2&lt;AG&gt;1&lt;AAG&gt;6 (n=1)<br…