# Simple codebase to use bertopic

In [None]:
from sentence_transformers import SentenceTransformer
from bertopic import BERTopic
import pandas as pd

# Sample stress/anxiety texts (replace with your filtered data)
texts = [
    "I'm so stressed about my work deadline, can't focus",
    "Relationship problems keeping me awake at night",
    "Exam anxiety is overwhelming, can't study properly",
    "Family issues causing me constant worry",
    "Financial stress from mounting bills and debt",
    "My boss is demanding too much, work is killing me",
    "Partner and I keep fighting, relationship falling apart",
    "University pressure and grades stressing me out",
    "Can't handle family drama and expectations anymore",
    "Money problems affecting my sleep and mental health"
]

# Step 1: Generate embeddings with Sentence-BERT
sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = sentence_model.encode(texts)

# Step 2: Perform topic modeling with BERTopic
topic_model = BERTopic(min_topic_size=2, verbose=True)
topics, probabilities = topic_model.fit_transform(texts, embeddings)

# Step 3: Get topic information
print("Discovered Topics:")
topic_info = topic_model.get_topic_info()
print(topic_info)

print("\nTopic Keywords:")
for topic_id in set(topics):
    if topic_id != -1:  # Skip outliers
        words = topic_model.get_topic(topic_id)[:5]  # Top 5 words
        print(f"Topic {topic_id}: {[word for word, score in words]}")

print("\nDocument-Topic Assignment:")
for i, (text, topic) in enumerate(zip(texts, topics)):
    print(f"Doc {i}: Topic {topic} - {text[:50]}...")

# Step 4: Visualize topics
# topic_model.visualize_topics().show()  # Interactive plot
# topic_model.visualize_documents(texts, embeddings).show()  # Document clusters