In [3]:
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, HTML

# Load your datasets
topic_df = pd.read_csv("[T3 UPDATED] BERTopic-topics per segment.csv")
category_df = pd.read_csv("BERTopic w categories.csv") # Load your category mapping CSV

# Prepare category list and count of topics per category
category_counts = category_df.groupby('Category')['Topic #'].nunique().to_dict()
categories = sorted(category_counts.keys())

# Prepare a mapping: category -> list of topics (Topic # and labels)
category_to_topics = {}
for cat in categories:
    subset = category_df[category_df['Category'] == cat]
    # List of tuples: (topic number, label, original topic words)
    category_to_topics[cat] = list(zip(subset['Topic #'], subset['Label'], subset['Subtopics']))

# Create dropdowns
category_dropdown = widgets.Dropdown(
    options=[(f"{cat} ({category_counts[cat]} topics)", cat) for cat in categories],
    description='Category:',
    layout=widgets.Layout(width='80%')
)

topic_dropdown = widgets.Dropdown(
    options=[],
    description='Topic:',
    layout=widgets.Layout(width='80%')
)

output = widgets.Output()

def update_topics(change):
    # Update topics dropdown when category changes
    selected_cat = change['new']
    topics = category_to_topics.get(selected_cat, [])
    # Format topics with original name + label
    options = [(f"Topic {t[0]}: {t[2]} ({t[1]})", t[0]) for t in topics]
    topic_dropdown.options = options
    if options:
        topic_dropdown.value = options[0][1]  # select first topic by default
    else:
        topic_dropdown.value = None

def show_topic_info(change):
    output.clear_output()
    selected_topic = change['new']
    if selected_topic is None:
        return
    with output:
        # Filter topic_df for the selected topic
        filtered_df = topic_df[topic_df['Topic'] == selected_topic]
        video_counts = filtered_df[['Video Id', 'Video Title']].drop_duplicates()
        n_segments = len(filtered_df)
        n_videos = len(video_counts)

        # Get topic info from category_df for label and original topic name
        row = category_df[category_df['Topic #'] == selected_topic].iloc[0]
        original_name = row['Subtopics']
        custom_label = row['Label']

        # Display summary header with original and custom label
        display(HTML(f"<h3>Topic {selected_topic}: {original_name} ({custom_label})</h3>"))
        display(HTML(f"<b>Total Videos:</b> {n_videos}"))
        display(HTML(f"<b>Total Segments:</b> {n_segments}<br><br>"))

        # Maximize pandas display settings for full table view
        pd.set_option('display.max_rows', None)
        pd.set_option('display.max_columns', None)
        pd.set_option('display.width', None)
        pd.set_option('display.max_colwidth', None)

        # Display table of unique videos for this topic
        display(video_counts.reset_index(drop=True))

# Wire up the event handlers
category_dropdown.observe(update_topics, names='value')
topic_dropdown.observe(show_topic_info, names='value')

# Display widgets
display(category_dropdown, topic_dropdown, output)

# Trigger initial population of topics dropdown
category_dropdown.value = categories[0]


Dropdown(description='Category:', layout=Layout(width='80%'), options=(('Culture (26 topics)', 'Culture'), ('E…

Dropdown(description='Topic:', layout=Layout(width='80%'), options=(), value=None)

Output()