# Installation

In [None]:
!pip install groq
!pip install bertopic
!pip install sentence-transformers faiss-cpu torch transformers



# Imports

In [None]:
import re
import nltk
import faiss
import numpy as np
from groq import Groq
from nltk.corpus import stopwords
from transformers import pipeline
from dataclasses import dataclass
from typing import List, Dict, Any
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from gensim.utils import simple_preprocess
from sklearn.preprocessing import LabelEncoder
from sklearn.datasets import fetch_20newsgroups
from gensim.corpora.dictionary import Dictionary
from sentence_transformers import SentenceTransformer
from gensim.models.coherencemodel import CoherenceModel
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.cluster import normalized_mutual_info_score


In [None]:
# Download required NLTK data
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('punkt_tab')


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

# Data class to store LLM responses with enhanced fields

In [None]:
@dataclass
class LLMResponse:
    content: str
    score: float = None
    topic: str = None
    confidence: float = None
    error: str = None
    metadata: Dict[str, Any] = None

    def __post_init__(self):
        if self.metadata is None:
            self.metadata = {}


# Data class to store document information

In [None]:
@dataclass
class Document:
    content: str
    embedding: np.ndarray = None
    metadata: Dict[str, Any] = None

# Retrieval-Augmented Generation (RAG) system, combining a neural embedding model with a vector similarity search engine to manage and retrieve relevant documents.

In [None]:
class RAGSystem:
    def __init__(self, embedding_model="all-MiniLM-L6-v2"):
      """
      Initializes the RAG system:
        Loads the specified embedding model.
        Creates an empty document store and sets the FAISS index to None.
      """
        self.embedding_model = SentenceTransformer(embedding_model)
        self.document_store = []
        self.index = None

    def add_documents(self, documents: List[str]):
      """
      Adds documents to the system and prepares them for retrieval
      Converts each document into a vector embedding using the SentenceTransformer model.
      Each document is wrapped in a Document object containing:
          content: The text of the document.
          embedding: The corresponding embedding (as a NumPy array).
      These objects are appended to document_store.
      """
        embeddings = self.embedding_model.encode(documents, convert_to_tensor=True)
        for doc, emb in zip(documents, embeddings):
            self.document_store.append(Document(
                content=doc,
                embedding=emb.numpy()
            ))
        self._update_index()

    def _update_index(self):
      """
      Updates the FAISS index with embeddings from all stored documents
      """
        embeddings = np.vstack([doc.embedding for doc in self.document_store])
        dimension = embeddings.shape[1]
        self.index = faiss.IndexFlatL2(dimension)
        self.index.add(embeddings.astype('float32'))

    def retrieve_relevant_docs(self, query: str, k: int = 3):
      """
      Retrieves the top-k documents most relevant to a given query
      """
        query_embedding = self.embedding_model.encode([query])[0]
        D, I = self.index.search(query_embedding.reshape(1, -1).astype('float32'), k)
        return [self.document_store[i] for i in I[0]]


# Extract pattern from text with fallback

In [None]:
def find_pattern_safely(pattern, text, default=None):
    if not text:
        return default
    try:
        match = re.search(pattern, text, re.IGNORECASE | re.MULTILINE)
        if match:
            return match.group(1).strip()
    except Exception:
        pass
    return default


# Clean and standardize topic string

In [None]:
def clean_topic(topic):
    if not topic:
        return None

    topic = re.sub(r'^\d+\.\s*', '', topic)
    topic = re.sub(r'^-\s*', '', topic)
    topic = re.sub(r'^\(|\)$', '', topic)
    topic = re.sub(r'\b\d+millisecond\b', '', topic)
    topic = re.sub(r'\s+and\s+', ' & ', topic)
    topic = ' '.join(topic.split())
    topic = topic.strip()

    if len(topic) < 3:
        return None

    return topic

# Preprocessing with cleaning and standardization

In [None]:
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'\s+', ' ', text)
    text = re.sub(r'[^\w\s]', '', text)

    tokens = word_tokenize(text)
    stop_words = set(stopwords.words('english'))
    custom_stops = {'would', 'could', 'should', 'said', 'like', 'also'}
    stop_words.update(custom_stops)

    filtered_tokens = []
    for token in tokens:
        if (token not in stop_words and
            len(token) > 2 and
            not token.isnumeric() and
            not all(c in '0123456789.-' for c in token)):

            if token.isupper() and len(token) <= 5:
                filtered_tokens.append(token)
            else:
                filtered_tokens.append(token.lower())

    cleaned_text = " ".join(filtered_tokens)
    return cleaned_text if cleaned_text.strip() else "placeholder"

# Document preprocessing for topic modeling

In [None]:
def preprocess_documents(documents):
    processed_docs = []
    for doc in documents:
        try:
            if isinstance(doc, str):
                # Basic cleaning
                doc = doc.lower()
                doc = re.sub(r'\s+', ' ', doc)
                doc = re.sub(r'[^\w\s-]', '', doc)

                # Tokenize
                tokens = word_tokenize(doc)
                stop_words = set(stopwords.words('english'))
                custom_stops = {'would', 'could', 'should', 'said', 'like', 'also'}
                stop_words.update(custom_stops)

                # Filter tokens
                filtered_tokens = []
                for token in tokens:
                    if (token not in stop_words and
                        len(token) > 2 and
                        not token.isnumeric() and
                        not all(c in '0123456789.-' for c in token)):
                        filtered_tokens.append(token.lower())

                if filtered_tokens:  # Only append if we have tokens
                    processed_docs.append(filtered_tokens)
                else:
                    processed_docs.append(['placeholder'])  # Add placeholder if no tokens
            else:
                processed_docs.append(['placeholder'])

        except Exception as e:
            print(f"Error preprocessing document: {e}")
            processed_docs.append(['placeholder'])  # Add placeholder on error

    return processed_docs


# Class for advanced processing of technical text using a large language model.

Summarizing technical content.

Classifying topics and extracting technical features.

Grading groups of documents for coherence and distinctiveness.

In [None]:
class EnhancedLLMProcessor:
    def __init__(self, api_key: str, model: str = "llama-3.2-3b-preview"):
        self.client = Groq(api_key=api_key)
        self.model = model

        self.prompts = {
            "summarize": """You are a technical document analyzer specializing in extracting key information from texts.

TEXT TO ANALYZE:
{text}

RELEVANT CONTEXT:
{context}

Provide your analysis in this EXACT format:
MAIN_TOPIC: [primary technical/scientific field]
KEY_TERMS: [list only the most relevant technical terms, comma-separated]
SUMMARY: [2-3 concise, technical sentences capturing the essence]""",

            "topic": """You are an expert topic classifier focusing on technical and academic content.

Document for classification:
{text}

Analyze this document following these steps:
1. Identify primary technical domain
2. Extract key technical terminology
3. Recognize methodological approaches
4. Note any cross-domain elements

Provide classification in this EXACT format:
PRIMARY_TOPIC: [single specific technical field]
SUBTOPICS: [3-4 related technical areas]
TECHNICAL_INDICATORS: [key technical terms that influenced classification]
CROSS_DOMAIN_ELEMENTS: [any interdisciplinary aspects]
CONFIDENCE: [0-1 score with brief justification]""",

            "grade": """You are a specialized content coherence evaluator.

TARGET GROUP:
{documents}

COMPARISON GROUPS:
{other_groups}

Evaluation Criteria:
1. INTERNAL COHERENCE (50%)
- How consistently do the documents align in topic and terminology?
- Do they share a common technical vocabulary?
- Is there thematic continuity?

2. EXTERNAL DISTINCTIVENESS (50%)
- How clearly separated is this group from others?
- Are there unique technical markers?
- Is there minimal topic overlap with other groups?

Provide your evaluation in this EXACT format:
COHERENCE_SCORE: [1-10]
DISTINCTIVENESS_SCORE: [1-10]
FINAL_SCORE: [average of above, rounded to nearest whole number]
STRONG_POINTS: [bullet list of group's strongest cohesion markers]
DISTINGUISHING_FEATURES: [key elements that separate this group]"""
        }

    def process_text(self, text: str, task: str, additional_context: Dict = None) -> LLMResponse:
        try:
            prompt_template = self.prompts.get(task)
            if not prompt_template:
                raise ValueError(f"Unknown task: {task}")

            # Initialize default context values
            context = {
                "text": text,
                "context": "",  # Default empty context
                "documents": "",  # Default empty documents
                "other_groups": ""  # Default empty other_groups
            }

            # Update with any additional context provided
            if additional_context:
                context.update(additional_context)

            # Format the prompt with the context
            prompt = prompt_template.format(**context)

            # Process with LLM
            if task == "summarize":
                completion = self.client.chat.completions.create(
                    model=self.model,
                    messages=[
                        {
                            "role": "system",
                            "content": "You are a specialized technical content analysis system."
                        },
                        {
                            "role": "user",
                            "content": prompt
                        }
                    ],
                    temperature=0.3,
                    max_tokens=500,
                    stream=True
                )

                response = ""
                for chunk in completion:
                    response += chunk.choices[0].delta.content or ""

                if not all(section in response for section in ["MAIN_TOPIC:", "KEY_TERMS:", "SUMMARY:"]):
                    return LLMResponse(content=text[:200] + "...")

            else:  # For topic and grade tasks
                completion = self.client.chat.completions.create(
                    model=self.model,
                    messages=[
                        {
                            "role": "system",
                            "content": "You are a specialized technical content analysis system."
                        },
                        {
                            "role": "user",
                            "content": prompt
                        }
                    ],
                    temperature=0.5,
                    max_tokens=500,
                    stream=False
                )

                response = completion.choices[0].message.content.strip()

            # Process responses based on task
            if task == "grade":
                score = self._extract_score_from_response(response)
                coherence = self._calculate_group_coherence([text])
                return LLMResponse(
                    content=response,
                    score=score,
                    metadata={"coherence": coherence}
                )

            elif task == "topic":
                topic_info = self._extract_topic_info(response)
                return LLMResponse(
                    content=response,
                    topic=topic_info["primary_topic"],
                    confidence=topic_info["confidence"],
                    metadata=topic_info
                )

            return LLMResponse(content=response)

        except Exception as e:
            print(f"Error in processing {task}: {str(e)}")
            return LLMResponse(content="", error=str(e))

    def _extract_score_from_response(self, response_text):
        """Extract numerical score from LLM response with fallback options"""
        try:
            # First try to get explicit scores
            coherence_match = re.search(r'COHERENCE_SCORE:\s*(\d+)', response_text)
            distinctiveness_match = re.search(r'DISTINCTIVENESS_SCORE:\s*(\d+)', response_text)
            final_match = re.search(r'FINAL_SCORE:\s*(\d+)', response_text)

            # If we have a final score, use that
            if final_match:
                score = int(final_match.group(1))
                return min(10, max(1, score))  # Ensure score is between 1 and 10

            # If we have both coherence and distinctiveness, calculate average
            if coherence_match and distinctiveness_match:
                coherence_score = float(coherence_match.group(1))
                distinctiveness_score = float(distinctiveness_match.group(1))
                return round((coherence_score + distinctiveness_score) / 2)

            # Try other numerical patterns
            score_patterns = [
                r'score[^0-9]*(\d+)',
                r'rated[^0-9]*(\d+)',
                r'(\d+)[^0-9]*out of[^0-9]*10',
                r'(\d+)[^0-9]*points'
            ]

            for pattern in score_patterns:
                match = re.search(pattern, response_text.lower())
                if match:
                    score = int(match.group(1))
                    if 0 <= score <= 10:
                        return score
                    elif 0 <= score <= 100:
                        return round(score / 10)

            # If no numerical score found, use keyword analysis
            keywords = {
                10: ['exceptional', 'perfect', 'outstanding', 'excellent'],
                9: ['very strong', 'highly coherent', 'nearly perfect'],
                8: ['strong', 'very good', 'highly relevant'],
                7: ['good', 'quite coherent', 'mostly relevant'],
                6: ['above average', 'moderately good', 'fairly coherent'],
                5: ['average', 'moderate', 'mixed'],
                4: ['below average', 'somewhat weak', 'partially relevant'],
                3: ['weak', 'poor coherence', 'limited relevance'],
                2: ['very weak', 'poor', 'minimal coherence'],
                1: ['incoherent', 'irrelevant', 'completely disconnected']
            }

            response_lower = response_text.lower()
            for score, terms in keywords.items():
                if any(term in response_lower for term in terms):
                    return score

            # Count positive and negative indicators
            positive_indicators = sum(term in response_lower for term in
                ['consistent', 'coherent', 'well', 'clear', 'strong', 'good'])
            negative_indicators = sum(term in response_lower for term in
                ['inconsistent', 'weak', 'poor', 'lacks', 'missing', 'limited'])

            if positive_indicators or negative_indicators:
                base_score = 5
                score_modifier = positive_indicators - negative_indicators
                return min(10, max(1, base_score + score_modifier))

            return 5  # Default middle score if no other indicators found

        except Exception as e:
            print(f"Error extracting score: {e}")
            return 5

    def _extract_topic_info(self, response_text):
        """Extract topic information from response"""
        try:
            return {
                "primary_topic": find_pattern_safely(r'PRIMARY_TOPIC:\s*([^\n]+)', response_text, "unknown"),
                "subtopics": find_pattern_safely(r'SUBTOPICS:\s*([^\n]+)', response_text, "").split(','),
                "technical_indicators": find_pattern_safely(r'TECHNICAL_INDICATORS:\s*([^\n]+)', response_text, "").split(','),
                "cross_domain": find_pattern_safely(r'CROSS_DOMAIN_ELEMENTS:\s*([^\n]+)', response_text, "").split(','),
                "confidence": float(find_pattern_safely(r'CONFIDENCE:\s*(0\.\d+|1\.0)', response_text, "0.5"))
            }
        except Exception as e:
            print(f"Error extracting topic info: {e}")
            return {
                "primary_topic": "unknown",
                "subtopics": [],
                "technical_indicators": [],
                "cross_domain": [],
                "confidence": 0.5
            }

    def _calculate_group_coherence(self, documents):
        """Calculate internal coherence of a group of documents"""
        try:
            processed_docs = preprocess_documents(documents)
            dictionary = Dictionary(processed_docs)
            coherence_scores = calculate_coherence_scores([processed_docs], dictionary)
            return coherence_scores[0] if coherence_scores else 0.0
        except Exception as e:
            print(f"Error calculating group coherence: {e}")
            return 0.0


# Coherence score calculation with error handling and normalization

In [None]:
def calculate_coherence_scores(groups, dictionary, measure="c_v"):
    scores = []
    for group in groups:
        try:
            # Create "topics" as a list of the most frequent terms in the group
            topics = [[word for word, freq in dictionary.doc2bow(doc)] for doc in group]

            # Create a CoherenceModel for the group
            coherence_model = CoherenceModel(
                topics=topics,
                texts=group,
                dictionary=dictionary,
                coherence=measure
            )

            # Calculate the coherence score
            score = coherence_model.get_coherence()
            scores.append(score)
        except Exception as e:
            print(f"Error calculating coherence for group: {e}")
            scores.append(0.0)

    return scores


# Perform LDA analysis with preprocessing

In [None]:
def perform_lda_analysis(documents, n_topics=5):
    if not documents or not isinstance(documents, list):
        return {"assigned_topics": [], "topics_keywords": {}}

    try:
        vectorizer = CountVectorizer(
            stop_words='english',
            max_df=0.95,
            min_df=2,
            token_pattern=r'(?u)\b\w+\b'
        )

        X = vectorizer.fit_transform(documents)

        lda_model = LatentDirichletAllocation(
            n_components=n_topics,
            random_state=42,
            max_iter=20,
            learning_method='batch'
        )

        lda_model.fit(X)
        feature_names = vectorizer.get_feature_names_out()

        topics = {}
        for topic_idx, topic in enumerate(lda_model.components_):
            top_keywords_idx = topic.argsort()[-10:][::-1]
            top_keywords = [feature_names[i] for i in top_keywords_idx]
            topics[topic_idx] = top_keywords

        topic_assignments = lda_model.transform(X)
        assigned_topics = np.argmax(topic_assignments, axis=1)

        return {
            "assigned_topics": assigned_topics.tolist(),
            "topics_keywords": topics
        }

    except Exception as e:
        print(f"Error in LDA analysis: {str(e)}")
        return {"assigned_topics": [], "topics_keywords": {}}

# Topic assignment with error handling


In [None]:
def assign_topic_to_group(documents, n_topics=5):
    # Get LDA topics
    lda_results = perform_lda_analysis(documents, n_topics)

    # Get LLM topic analysis
    llm_topics = set()  # Use set for automatic deduplication
    try:
        for doc in documents:
            response = llm_processor.process_text(doc, "topic")
            if response and not response.error and response.content:
                # Extract topics with fallbacks
                primary = find_pattern_safely(r'PRIMARY_TOPIC:\s*([^\n]+)', response.content)
                subtopics = find_pattern_safely(r'SUBTOPICS:\s*([^\n]+)', response.content)
                tech_indicators = find_pattern_safely(r'TECHNICAL_INDICATORS:\s*([^\n]+)', response.content)
                cross_domain = find_pattern_safely(r'CROSS_DOMAIN_ELEMENTS:\s*([^\n]+)', response.content)

                # Process primary topic
                if primary:
                    clean_primary = clean_topic(primary)
                    if clean_primary:
                        llm_topics.add(clean_primary)

                # Process subtopics
                if subtopics:
                    for topic in subtopics.split(','):
                        clean_sub = clean_topic(topic)
                        if clean_sub:
                            llm_topics.add(clean_sub)

                # Process technical indicators
                if tech_indicators:
                    for term in tech_indicators.split(','):
                        clean_term = clean_topic(term)
                        if clean_term and len(clean_term.split()) > 1:  # Only add multi-word technical terms
                            llm_topics.add(clean_term)

                # Process cross-domain elements
                if cross_domain:
                    for element in cross_domain.split(','):
                        clean_element = clean_topic(element)
                        if clean_element:
                            llm_topics.add(clean_element)

    except Exception as e:
        print(f"Error in LLM topic analysis: {str(e)}")

    # Convert set to sorted list for consistent output
    llm_topics_list = sorted(list(llm_topics))

    # Group similar topics
    grouped_topics = []
    processed = set()

    for topic in llm_topics_list:
        if topic in processed:
            continue

        similar_topics = [topic]
        processed.add(topic)

        # Find similar topics
        for other in llm_topics_list:
            if other not in processed:
                # Check if topics are very similar
                if (topic.lower() in other.lower() or
                    other.lower() in topic.lower() or
                    len(set(topic.lower().split()) & set(other.lower().split())) >= 2):
                    similar_topics.append(other)
                    processed.add(other)

        # Add the main topic or the shortest similar topic
        if len(similar_topics) > 1:
            grouped_topics.append(min(similar_topics, key=len))
        else:
            grouped_topics.append(topic)

    result = {
        "lda_results": lda_results,
        "llm_topics": grouped_topics,
        "combined_analysis": {
            "assigned_topics": lda_results["assigned_topics"],
            "topics_keywords": lda_results["topics_keywords"],
            "llm_suggested_topics": grouped_topics
        }
    }

    return result

# Get a balanced dataset with specified number of documents per category.

In [None]:
def get_balanced_dataset(newsgroups, category_groups, docs_per_category=3):

    group_docs = []
    category_counts = {}

    for group_categories in category_groups:
        group_data = []
        group_total = 0

        for category in group_categories:
            category_indices = [i for i in range(len(newsgroups.target))
                              if newsgroups.target_names[newsgroups.target[i]] == category]

            # Get and preprocess documents
            category_docs = [preprocess_text(newsgroups.data[i])
                           for i in category_indices[:docs_per_category]]
            group_data.extend(category_docs)

            category_counts[category] = len(category_docs)
            group_total += len(category_docs)

        group_docs.append(group_data)

        print(f"\nGroup with categories {group_categories}:")
        print(f"Total documents: {group_total}")
        for category in group_categories:
            print(f"  - {category}: {category_counts[category]} documents")

    return group_docs, category_counts



# Evaluate groups multiple times

In [None]:
def evaluate_multiple_times(group1, group2, group3, topics=None, num_iterations=3):

    if topics is None:
        topics = ['Technology', 'Scientific', 'Social/Political']

    scores = {
        'llm_scores': {topic: [] for topic in topics},
        'coherence_scores': {topic: [] for topic in topics}
    }

    print("Summarizing documents...")
    summarized_group1 = [llm_processor.process_text(doc, "summarize").content for doc in group1]
    summarized_group2 = [llm_processor.process_text(doc, "summarize").content for doc in group2]
    summarized_group3 = [llm_processor.process_text(doc, "summarize").content for doc in group3]

    # Preprocess all groups once
    all_groups = [group1, group2, group3]
    tokenized_groups = [preprocess_documents(group) for group in all_groups]

    # Create dictionary from all documents
    all_docs_tokenized = [token for group in tokenized_groups for token in group]
    dictionary = Dictionary(all_docs_tokenized)
    dictionary.filter_extremes(no_below=2, no_above=0.95)  # Filter extreme terms

    for i in range(num_iterations):
        print(f"\nIteration {i + 1}/{num_iterations}")
        print("Performing LLM evaluation...")

        for j, (group, topic) in enumerate(zip([summarized_group1, summarized_group2, summarized_group3], topics)):
            other_groups = [g for k, g in enumerate([summarized_group1, summarized_group2, summarized_group3]) if k != j]
            scores['llm_scores'][topic].append(
                llm_processor.process_text("", "grade", {
                    "documents": "\n".join(group),
                    "other_groups": "\n".join(["\n".join(g) for g in other_groups])
                }).score
            )

    print("Calculating coherence scores...")
    coherence_scores = calculate_coherence_scores(tokenized_groups, dictionary)
    for topic, score in zip(topics, coherence_scores):
        scores['coherence_scores'][topic] = [score]  # Single coherence score per group

    # Calculate results
    results = {}
    for score_type in ['llm_scores', 'coherence_scores']:
        results[score_type] = {
            'scores': scores[score_type],
            'avg': {topic: sum(topic_scores)/len(topic_scores)
                   for topic, topic_scores in scores[score_type].items()},
            'std': {topic: ((sum((x - sum(topic_scores)/len(topic_scores)) ** 2
                   for x in topic_scores) / len(topic_scores)) ** 0.5)
                   for topic, topic_scores in scores[score_type].items()}
        }

    return results

# Run experiment

In [None]:
if __name__ == "__main__":
    # Initialize API key
    api_key = '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'
    client = Groq(api_key=api_key)

    # Initialize processors
    llm_processor = EnhancedLLMProcessor(api_key)
    rag_system = RAGSystem()

    # Load the 20 newsgroups dataset
    print("Loading dataset...")
    newsgroups = fetch_20newsgroups(subset='all', remove=('headers', 'footers', 'quotes'))

    # Define the groups for analysis
    topics = ['graphics', 'space', 'Political']

    group_categories = [
        ['comp.graphics'],
        ['sci.space']
    ]

    group2_categories = [
        ['sci.space'],
        ['sci.electronics'],
        ['rec.autos'],
        ['soc.religion.christian'],
        ['talk.politics.mideast']
    ]

    # Get balanced dataset
    print("\nGetting balanced dataset...")
    balanced_groups, category_counts = get_balanced_dataset(newsgroups, group_categories, docs_per_category=5)
    group1, group2 = balanced_groups

    # Get unbalanced group
    print("\nGetting unbalanced dataset...")
    unbalanced_group = get_balanced_dataset(newsgroups, group2_categories, docs_per_category=1)
    group3 = unbalanced_group[0]
    group3 = [string for sublist in group3 for string in sublist]

    # Initialize RAG system
    print("\nInitializing RAG system...")
    all_docs = group1 + group2 + group3
    rag_system.add_documents(all_docs)

    # Perform topic analysis
    print("\nAnalyzing topics for each group...")
    print("\nGroup 1 (Graphics) Topics:")
    print(assign_topic_to_group(group1))
    print("\nGroup 2 (Space) Topics:")
    print(assign_topic_to_group(group2))
    print("\nGroup 3 (Mixed) Topics:")
    print(assign_topic_to_group(group3))

    # Perform multiple evaluations
    print("\nPerforming multiple evaluations...")
    results = evaluate_multiple_times(group1, group2, group3, topics=topics, num_iterations=5)

    # Print results
    print("\nEvaluation Results:")
    print("-" * 80)

    print("\nLLM Scores (1-10):")
    for topic in topics:
        print(f"\n{topic}:")
        print(f"  Average Score: {results['llm_scores']['avg'][topic]:.2f}")
        print(f"  Standard Deviation: {results['llm_scores']['std'][topic]:.2f}")
        print(f"  All Scores: {results['llm_scores']['scores'][topic]}")

    print("\nCoherence Scores:")
    for topic in topics:
        print(f"\n{topic}:")
        print(f"  Score: {results['coherence_scores']['avg'][topic]:.2f}")
        print(f"  Standard Deviation: {results['coherence_scores']['std'][topic]:.2f}")
        if topic in results['coherence_scores']['scores']:
            print(f"  All Scores: {results['coherence_scores']['scores'][topic]}")

    print("\nAnalysis complete!")

Loading dataset...

Getting balanced dataset...

Group with categories ['comp.graphics']:
Total documents: 5
  - comp.graphics: 5 documents

Group with categories ['sci.space']:
Total documents: 5
  - sci.space: 5 documents

Getting unbalanced dataset...

Group with categories ['sci.space']:
Total documents: 1
  - sci.space: 1 documents

Group with categories ['sci.electronics']:
Total documents: 1
  - sci.electronics: 1 documents

Group with categories ['rec.autos']:
Total documents: 1
  - rec.autos: 1 documents

Group with categories ['soc.religion.christian']:
Total documents: 1
  - soc.religion.christian: 1 documents

Group with categories ['talk.politics.mideast']:
Total documents: 1
  - talk.politics.mideast: 1 documents

Initializing RAG system...

Analyzing topics for each group...

Group 1 (Graphics) Topics:
{'lda_results': {'assigned_topics': [0, 4, 2, 4, 0], 'topics_keywords': {0: ['international', 'features', 'geometric', 'description', 'using', 'form'], 1: ['using', 'form'