In [None]:
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output
import html

# Load your data
topic_df = pd.read_csv("[T3 UPDATED] BERTopic-topics per segment.csv")
category_df = pd.read_csv("BERTopic w categories.csv")

# Prepare category list and count of topics per category
category_counts = category_df.groupby('Category')['Topic #'].nunique().to_dict()
categories = sorted(category_counts.keys())

# Mapping: category -> list of (topic number, label, subtopics)
category_to_topics = {
    cat: list(zip(df['Topic #'], df['Label'], df['Subtopics']))
    for cat, df in category_df.groupby('Category')
}

# Widgets
category_dropdown = widgets.Dropdown(
    options=[(f"{cat} ({category_counts[cat]} topics)", cat) for cat in categories],
    description='Category:',
    layout=widgets.Layout(width='80%')
)

topic_dropdown = widgets.Dropdown(
    options=[],
    description='Topic:',
    layout=widgets.Layout(width='80%')
)

# Outputs
main_output = widgets.Output()
pagination_output = widgets.Output()

# Pagination controls
VIDEOS_PER_PAGE = 10
current_page = 0
grouped_videos = None
total_pages = 0
selected_topic_id = None

prev_button = widgets.Button(description="Previous", layout=widgets.Layout(width='100px'))
next_button = widgets.Button(description="Next", layout=widgets.Layout(width='100px'))
page_label = widgets.Label()
pagination_controls = widgets.HBox([prev_button, next_button, page_label])


def update_topics(change):
    selected_cat = change['new']
    topics = category_to_topics.get(selected_cat, [])
    options = [(f"Topic {t[0]}: {t[2]} ({t[1]})", t[0]) for t in topics]
    topic_dropdown.options = options
    topic_dropdown.value = options[0][1] if options else None

def render_video_page(page_idx):
    pagination_output.clear_output()
    start_idx = page_idx * VIDEOS_PER_PAGE
    end_idx = start_idx + VIDEOS_PER_PAGE

    with pagination_output:
        html_output = ""
        for _, row in grouped_videos.iloc[start_idx:end_idx].iterrows():
            video_id = html.escape(row['Video Id'])
            video_title = html.escape(row['Video Title'])
            segments = row['Segment']
            segment_html = "".join(
                f"<div style='margin-bottom: 10px; padding-left: 10px;'>• {html.escape(seg)}</div>" for seg in segments
            )

            html_output += f"""
            <details>
                <summary><b>{video_title}</b> ({video_id}) - {len(segments)} segments</summary>
                <div style="margin-left: 20px; margin-top: 5px;">{segment_html}</div>
            </details>
            <br>
            """
        display(HTML(html_output))
        page_label.value = f"Page {current_page + 1} of {total_pages}"

def on_prev_clicked(_):
    global current_page
    if current_page > 0:
        current_page -= 1
        render_video_page(current_page)

def on_next_clicked(_):
    global current_page
    if current_page + 1 < total_pages:
        current_page += 1
        render_video_page(current_page)

prev_button.on_click(on_prev_clicked)
next_button.on_click(on_next_clicked)

def show_topic_info(change):
    global grouped_videos, current_page, total_pages, selected_topic_id
    main_output.clear_output()
    pagination_output.clear_output()
    selected_topic = change['new']
    if selected_topic is None:
        return

    selected_topic_id = selected_topic
    current_page = 0

    with main_output:
        # Filter rows with the selected topic
        filtered_df = topic_df[topic_df['Topic'] == selected_topic]
        grouped_videos = filtered_df.groupby(['Video Id', 'Video Title'])['Segment'].apply(list).reset_index()
        total_pages = max(1, (len(grouped_videos) - 1) // VIDEOS_PER_PAGE + 1)

        # Get topic info
        row = category_df[category_df['Topic #'] == selected_topic].iloc[0]
        original_name = row['Subtopics']
        custom_label = row['Label']

        # Summary header
        display(HTML(f"""
        <h3>Topic {selected_topic}: {html.escape(original_name)} (<i>{html.escape(custom_label)}</i>)</h3>
        <b>Total Videos:</b> {len(grouped_videos)}<br>
        <b>Total Segments:</b> {len(filtered_df)}<br><br>
        """))

        # First render
        render_video_page(current_page)
        display(pagination_controls, pagination_output)

# Connect widgets
category_dropdown.observe(update_topics, names='value')
topic_dropdown.observe(show_topic_info, names='value')

# Show widgets
display(category_dropdown, topic_dropdown, main_output)

# Trigger initial population
category_dropdown.value = categories[0]


Dropdown(description='Category:', layout=Layout(width='80%'), options=(('Culture (26 topics)', 'Culture'), ('E…

Dropdown(description='Topic:', layout=Layout(width='80%'), options=(), value=None)

Output()