## Importing Libraries

In [1]:
from typing import List
import os
import pdb
import re
import ast
import json
import random
import pandas as pd
import numpy as np
from openai import OpenAI
from importlib import resources
from IPython.display import display, clear_output
from chunking_evaluation.utils import rigorous_document_search
from chunking_evaluation.evaluation_framework.base_evaluation import BaseEvaluation

class SyntheticEvaluation(BaseEvaluation):
    def __init__(self, corpora_paths: List[str], queries_csv_path: str, chroma_db_path: str = None,
                 openai_api_key=None):
        super().__init__(questions_csv_path=queries_csv_path, chroma_db_path=chroma_db_path)
        self.corpora_paths = corpora_paths
        self.questions_csv_path = queries_csv_path
        self.client = OpenAI(base_url="Your-URL-HERE", api_key="XXXX-Your-API-Key-Here-XXX")
        self.synth_questions_df = None

        with resources.as_file(resources.files('chunking_evaluation.evaluation_framework') / 'prompts') as prompt_path:
            with open(os.path.join(prompt_path, 'question_maker_system.txt'), 'r') as f:
                self.question_maker_system_prompt = f.read()

            with open(os.path.join(prompt_path, 'question_maker_approx_system.txt'), 'r') as f:
                self.question_maker_approx_system_prompt = f.read()

            with open(os.path.join(prompt_path, 'question_maker_user.txt'), 'r') as f:
                self.question_maker_user_prompt = f.read()

            with open(os.path.join(prompt_path, 'question_maker_approx_user.txt'), 'r') as f:
                self.question_maker_approx_user_prompt = f.read()

    def _save_questions_df(self):
        self.synth_questions_df.to_csv(self.questions_csv_path, index=False)

    def _tag_text(self, text):
        chunk_length = 100
        chunks = []
        tag_indexes = [0]
        start = 0
        while start < len(text):
            end = start + chunk_length
            chunk = text[start:end]
            if end < len(text):
                # Find the last space within the chunk to avoid splitting a word
                space_index = chunk.rfind(' ')
                if space_index != -1:
                    end = start + space_index + 1  # Include the space in the chunk
                    chunk = text[start:end]
            chunks.append(chunk)
            tag_indexes.append(end)
            start = end  # Move start to end to continue splitting

        tagged_text = ""
        for i, chunk in enumerate(chunks):
            tagged_text += f"<start_chunk_{i}>" + chunk + f"<end_chunk_{i}>"

        return tagged_text, tag_indexes

    def convert_text_to_json(self,text: str):
        """
        Removes the text between <think> and </think> tags and converts
        the remaining text (assumed to be a Python dictionary literal)
        into a JSON string.
    
        Args:
            text (str): The original text containing a <think>...</think> block
                        and the dictionary to convert.
    
        Returns:
            str: A JSON-formatted string representing the dictionary.
        """
        # Remove everything between <think> and </think> (including the tags)
        cleaned_text = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL)
    
        # Strip any leading/trailing whitespace
        cleaned_text = cleaned_text.strip()
    
        # Parse the remaining text as a Python literal.
        # (Using ast.literal_eval since the text is written as a Python dictionary.)
        try:
            data = ast.literal_eval(cleaned_text)
        except Exception as e:
            raise ValueError("Failed to parse the remaining text as a dictionary: " + str(e))
    
        # Convert the Python dictionary to a JSON-formatted string
        json_response = json.dumps(data, indent=2)
        
        return json_response

    def _extract_question_and_approx_references(self, corpus, document_length=4000, prev_questions=[]):
        
        if len(corpus) > document_length:
            start_index = random.randint(0, len(corpus) - document_length)
            document = corpus[start_index: start_index + document_length]
        else:
            start_index = 0
            document = corpus

        if prev_questions is not None:
            if len(prev_questions) > 20:
                questions_sample = random.sample(prev_questions, 20)
                prev_questions_str = '\n'.join(questions_sample)
            else:
                prev_questions_str = '\n'.join(prev_questions)
        else:
            prev_questions_str = ""

        tagged_text, tag_indexes = self._tag_text(document)

        completion = self.client.chat.completions.create(
            model="deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
            messages=[
                {"role": "system", "content": self.question_maker_approx_system_prompt},
                {"role": "user",
                 "content": self.question_maker_approx_user_prompt.replace("{document}", tagged_text).replace(
                     "{prev_questions_str}", prev_questions_str)}
            ]
        )

        response = completion.choices[0].message.content
        json_response = json.loads(self.convert_text_to_json(response))
        
        print(json_response)
        
        try:
            text_references = json_response['references']
        except KeyError:
            raise ValueError("The response does not contain a 'references' field.")

        try:
            question = json_response['question']
        except KeyError:
            raise ValueError("The response does not contain a 'question' field.")

        
        references = []
    
        for reference in text_references:
            try:
                reference_keys = list(reference.keys())
    
                if len(reference_keys) != 3:
                    raise ValueError(
                        f"Each reference must have exactly 3 keys: 'content', 'start_chunk', and 'end_chunk'. Got keys: {reference_keys}")
    
                if 'start_chunk' not in reference_keys or 'end_chunk' not in reference_keys:
                    raise ValueError("Each reference must contain 'start_chunk' and 'end_chunk' keys.")
    
                if 'end_chunk' not in reference_keys:
                    reference_keys.remove('content')
                    reference_keys.remove('start_chunk')
                    end_chunk_key = reference_keys[0]
                    end_index = start_index + tag_indexes[reference[end_chunk_key] + 1]
                else:
                    end_index = start_index + tag_indexes[reference['end_chunk'] + 1]
    
                start_index = start_index + tag_indexes[reference['start_chunk']]
                references.append((corpus[start_index:end_index], start_index, end_index))
                
            except Exception as e:
                print(e)
                
        return question, references

    def _extract_question_and_references(self, corpus, document_length=4000, prev_questions=[]):
        if len(corpus) > document_length:
            start_index = random.randint(0, len(corpus) - document_length)
            document = corpus[start_index: start_index + document_length]
        else:
            document = corpus

        if prev_questions is not None:
            if len(prev_questions) > 20:
                questions_sample = random.sample(prev_questions, 20)
                prev_questions_str = '\n'.join(questions_sample)
            else:
                prev_questions_str = '\n'.join(prev_questions)
        else:
            prev_questions_str = ""

        completion = self.client.chat.completions.create(
            model="deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
            messages=[
                {"role": "system", "content": self.question_maker_system_prompt},
                {"role": "user", "content": self.question_maker_user_prompt.replace("{document}", document).replace(
                    "{prev_questions_str}", prev_questions_str)}
            ]
        )

        response = completion.choices[0].message
        json_response = json.loads(response.json())
        
        input_str = json_response['content']
        # new_string = self.remove_think_tags(input_str)
        new_string = input_str.replace("```json", "```")
        cleaned_string = new_string.strip().strip('```').strip()
            
        parsed_dict = json.loads(input_str)
        print(parsed_dict)
        
        json_response = parsed_dict


        try:
            text_references = json_response['references']
        except KeyError:
            raise ValueError("The response does not contain a 'references' field.")

        try:
            question = json_response['question']
        except KeyError:
            raise ValueError("The response does not contain a 'question' field.")

        references = []
        for reference in text_references:
            if not isinstance(reference, str):
                raise ValueError(f"Expected reference to be of type str, but got {type(reference).__name__}")
            target = rigorous_document_search(corpus, reference)
            if target is not None:
                reference, start_index, end_index = target
                references.append((reference, start_index, end_index))
            else:
                raise ValueError(f"No match found in the document for the given reference.\nReference: {reference}")

        return question, references

    def _generate_corpus_questions(self, corpus_id, approx=False, n=5):
        with open(corpus_id, 'r') as file:
            corpus = file.read()

        i = 0
        while i < n:
            while True:
                try:
                    print(f"Trying Query {i}")
                    questions_list = self.synth_questions_df[self.synth_questions_df['corpus_id'] == corpus_id][
                        'question'].tolist()
                    if approx:
                        question, references = self._extract_question_and_approx_references(corpus, 4000,
                                                                                            questions_list)
                    else:
                        question, references = self._extract_question_and_references(corpus, 4000, questions_list)
                    if len(references) > 5:
                        raise ValueError("The number of references exceeds 5.")

                    references = [{'content': ref[0], 'start_index': ref[1], 'end_index': ref[2]} for ref in references]
                    new_question = {
                        'question': question,
                        'references': json.dumps(references),
                        'corpus_id': corpus_id
                    }

                    new_df = pd.DataFrame([new_question])
                    self.synth_questions_df = pd.concat([self.synth_questions_df, new_df], ignore_index=True)
                    self._save_questions_df()

                    break
                except (ValueError, json.JSONDecodeError) as e:
                    print(f"Error occurred: {e}")
                    continue
            i += 1

    def _get_synth_questions_df(self):
        if os.path.exists(self.questions_csv_path):
            synth_questions_df = pd.read_csv(self.questions_csv_path)
        else:
            synth_questions_df = pd.DataFrame(columns=['question', 'references', 'corpus_id'])
        return synth_questions_df

    def generate_queries_and_excerpts(self, approximate_excerpts=False, num_rounds=-1, queries_per_corpus=5):
        self.synth_questions_df = self._get_synth_questions_df()

        rounds = 0
        while num_rounds == -1 or rounds < num_rounds:
            for corpus_id in self.corpora_paths:
                self._generate_corpus_questions(corpus_id, approx=approximate_excerpts, n=queries_per_corpus)
            rounds += 1

    def _get_sim(self, target, references):
        response = self.client.embeddings.create(
            input=[target] + references,
            model="text-embedding-bge-m3"
        )
        nparray1 = np.array(response.data[0].embedding)

        full_sim = []
        for i in range(1, len(response.data)):
            nparray2 = np.array(response.data[i].embedding)
            cosine_similarity = np.dot(nparray1, nparray2) / (np.linalg.norm(nparray1) * np.linalg.norm(nparray2))
            full_sim.append(cosine_similarity)

        return full_sim

    def _corpus_filter_poor_highlights(self, corpus_id, synth_questions_df, threshold):
        corpus_questions_df = synth_questions_df[synth_questions_df['corpus_id'] == corpus_id]

        def edit_row(row):
            question = row['question']
            references = [ref['content'] for ref in row['references']]
            similarity_scores = self._get_sim(question, references)
            worst_ref_score = min(similarity_scores)
            row['worst_ref_score'] = worst_ref_score
            return row

        # Apply the function to each row
        corpus_questions_df = corpus_questions_df.apply(edit_row, axis=1)

        count_before = len(corpus_questions_df)

        corpus_questions_df = corpus_questions_df[corpus_questions_df['worst_ref_score'] >= threshold]
        corpus_questions_df = corpus_questions_df.drop(columns=['worst_ref_score'])

        count_after = len(corpus_questions_df)

        print(f"Corpus: {corpus_id} - Removed {count_before - count_after} .")

        corpus_questions_df['references'] = corpus_questions_df['references'].apply(json.dumps)

        full_questions_df = pd.read_csv(self.questions_csv_path)
        full_questions_df = full_questions_df[full_questions_df['corpus_id'] != corpus_id]

        full_questions_df = pd.concat([full_questions_df, corpus_questions_df], ignore_index=True)
        # Drop the columns 'fixed', 'worst_ref_score' and 'diff_score' if they exist
        for col in ['fixed', 'worst_ref_score', 'diff_score']:
            if col in full_questions_df.columns:
                full_questions_df = full_questions_df.drop(columns=col)

        full_questions_df.to_csv(self.questions_csv_path, index=False)

    def filter_poor_excerpts(self, threshold=0.36, corpora_subset=[]):
        if os.path.exists(self.questions_csv_path):
            synth_questions_df = pd.read_csv(self.questions_csv_path)
            if len(synth_questions_df) > 0:
                synth_questions_df['references'] = synth_questions_df['references'].apply(json.loads)
                corpus_list = synth_questions_df['corpus_id'].unique().tolist()
                if corpora_subset:
                    corpus_list = [c for c in corpus_list if c in corpora_subset]
                for corpus_id in corpus_list:
                    self._corpus_filter_poor_highlights(corpus_id, synth_questions_df, threshold)

    def _corpus_filter_duplicates(self, corpus_id, synth_questions_df, threshold):
        corpus_questions_df = synth_questions_df[synth_questions_df['corpus_id'] == corpus_id].copy()

        count_before = len(corpus_questions_df)

        corpus_questions_df.drop_duplicates(subset='question', keep='first', inplace=True)

        questions = corpus_questions_df['question'].tolist()

        response = self.client.embeddings.create(
            input=questions,
            model="text-embedding-bge-m3"
        )

        embeddings_matrix = np.array([data.embedding for data in response.data])

        dot_product_matrix = np.dot(embeddings_matrix, embeddings_matrix.T)

        # Create a list of tuples containing the index pairs and their similarity
        similarity_pairs = [(i, j, dot_product_matrix[i][j]) for i in range(len(dot_product_matrix)) for j in
                            range(i + 1, len(dot_product_matrix))]

        # Sort the list of tuples based on the similarity in descending order
        similarity_pairs.sort(key=lambda x: x[2], reverse=True)

        similarity_scores = np.array([x[2] for x in similarity_pairs])

        most_similars = (dot_product_matrix - np.eye(dot_product_matrix.shape[0])).max(axis=1)

        def filter_vectors(sim_matrix, threshold):
            n = sim_matrix.shape[0]  # Number of vectors
            remaining = np.ones(n, dtype=bool)  # Initialize all vectors as remaining

            for i in range(n):
                if remaining[i] == 1:  # Only check for vectors that are still remaining
                    for j in range(i + 1, n):
                        if remaining[j] == 1 and sim_matrix[i, j] > threshold:
                            remaining[j] = 0  # Remove vector j because it's too similar to vector i

            return remaining

        rows_to_keep = filter_vectors(dot_product_matrix, threshold)

        corpus_questions_df = corpus_questions_df[rows_to_keep]

        count_after = len(corpus_questions_df)

        print(f"Corpus: {corpus_id} - Removed {count_before - count_after} .")

        corpus_questions_df['references'] = corpus_questions_df['references'].apply(json.dumps)

        full_questions_df = pd.read_csv(self.questions_csv_path)
        full_questions_df = full_questions_df[full_questions_df['corpus_id'] != corpus_id]

        full_questions_df = pd.concat([full_questions_df, corpus_questions_df], ignore_index=True)
        # Drop the columns 'fixed', 'worst_ref_score' and 'diff_score' if they exist
        for col in ['fixed', 'worst_ref_score', 'diff_score']:
            if col in full_questions_df.columns:
                full_questions_df = full_questions_df.drop(columns=col)

        full_questions_df.to_csv(self.questions_csv_path, index=False)

    def filter_duplicates(self, threshold=0.78, corpora_subset=[]):
        if os.path.exists(self.questions_csv_path):
            synth_questions_df = pd.read_csv(self.questions_csv_path)
            if len(synth_questions_df) > 0:
                synth_questions_df['references'] = synth_questions_df['references'].apply(json.loads)
                corpus_list = synth_questions_df['corpus_id'].unique().tolist()
                if corpora_subset:
                    corpus_list = [c for c in corpus_list if c in corpora_subset]
                for corpus_id in corpus_list:
                    self._corpus_filter_duplicates(corpus_id, synth_questions_df, threshold)

    def question_ref_filter(self):
        self.synth_questions_df = self._get_synth_questions_df()





In [3]:
# Specify the corpora paths, you can have multiple but we'll just use our one file
corpora_paths = [
    'chunking_evaluation/cybersecurity/cyberclean.txt',
    'chunking_evaluation/cybersecurity/reportsclean.txt'
]

csv_path = 'chunking_evaluation/cybersecurity_results_DeepSeek-R1-Distill-Llama-70B.csv'

# Initialize the evaluation
synthetic_pipeline = SyntheticEvaluation(corpora_paths, csv_path)
synthetic_pipeline.generate_queries_and_excerpts(approximate_excerpts=True,
                                         num_rounds=1,
                                         queries_per_corpus=50)


Trying Query 0
{'oath': "I will not use the word 'and' in the question unless it is part of a proper noun. I will also make sure the question is concise.", 'question': 'What supplementary NSA guidance is available for ensuring a secure network environment?', 'references': [{'content': 'Supplementary NSA guidance on ensuring a secure and defensible network environment is available at www.nsa.gov/cybersecurity-guidance. Of particular relevance are:', 'start_chunk': 13, 'end_chunk': 14}, {'content': 's Top Ten Cybersecurity Mitigation Strategies', 'start_chunk': 15, 'end_chunk': 15}, {'content': 'Defend Privileges and Accounts', 'start_chunk': 15, 'end_chunk': 15}, {'content': 'Continuously Hunt for Network Intrusions', 'start_chunk': 16, 'end_chunk': 16}, {'content': 'Segment Networks and Deploy Application-aware Defenses', 'start_chunk': 16, 'end_chunk': 16}]}
Trying Query 1
Error occurred: Failed to parse the remaining text as a dictionary: unterminated string literal (detected at line

In [4]:
synthetic_df = pd.read_csv("chunking_evaluation/cybersecurity_results_DeepSeek-R1-Distill-Llama-70B.csv")
synthetic_df.head()

Unnamed: 0,question,references,corpus_id
0,What supplementary NSA guidance is available f...,"[{""content"": ""(CI/CD) Environments\nFurther gu...",chunking_evaluation/cybersecurity/cyberclean.txt
1,What are the key steps to mitigate the modific...,"[{""content"": ""to hide malicious code,\nreduce ...",chunking_evaluation/cybersecurity/cyberclean.txt
2,What Execution techniques does an MCA employ t...,"[{""content"": ""consists of techniques that resu...",chunking_evaluation/cybersecurity/cyberclean.txt
3,How do you create a text hexadecimal hash for ...,"[{""content"": ""files can be signed to become AU...",chunking_evaluation/cybersecurity/cyberclean.txt
4,What key tasks are outlined in the FPGA config...,"[{""content"": ""........................ 54\nVer...",chunking_evaluation/cybersecurity/cyberclean.txt


In [6]:
from chunking_evaluation import GeneralEvaluation, SyntheticEvaluation, BaseChunker
from chromadb.utils import embedding_functions

class SentenceChunker(BaseChunker):
    def __init__(self, sentences_per_chunk: int = 3):
        # Initialize the chunker with the number of sentences per chunk
        self.sentences_per_chunk = sentences_per_chunk

    def split_text(self, text: str) -> List[str]:
        # Handle the case where the input text is empty
        if not text:
            return []

        # Split the input text into sentences using regular expression
        # Regex looks for white space following . ! or ? and makes a split
        sentences = re.split(r'(?<=[.!?])\s+', text)
        chunks = []

        # Group sentences into chunks based on the specified number
        for i in range(0, len(sentences), self.sentences_per_chunk):
            # Combine sentences into a single chunk
            chunk = ' '.join(sentences[i:i + self.sentences_per_chunk])
            chunks.append(chunk)
        
        # Return the list of chunks
        return chunks

In [7]:
# Helper Function
def print_metrics(results):
    
    # Grab Summary Metrics    
    metrics = {
        'Recall': (results['recall_mean'], results['recall_std']),
        'Precision': (results['precision_mean'], results['precision_std']),
        'Precision Ω': (results['precision_omega_mean'], results['precision_omega_std']),
        'IoU': (results['iou_mean'], results['iou_std'])
    }
    
    # Print each metric with mean ± std
    for metric, (mean, std) in metrics.items():
        print(f"{metric}: {mean:.4f} ± {std:.4f}")
        

In [9]:
# Defining our Configurations
chunkers = [
    SentenceChunker(sentences_per_chunk = 5),
    SentenceChunker(sentences_per_chunk = 10),
    SentenceChunker(sentences_per_chunk = 15),
    SentenceChunker(sentences_per_chunk = 20),
]

# Defining our Embedding Functions
embedders = [
    embedding_functions.OpenAIEmbeddingFunction(
    api_base="http://127.0.0.1:1234/v1",
    api_key="lm-studio",
    model_name="text-embedding-bge-m3"
)

]

# Initialize Results Storage
synth_results = []

# Helper Function
def get_config_name(chunker, ef):
    chunk_size = chunker.sentences_per_chunk if hasattr(chunker, 'sentences_per_chunk') else 0
    ef_name = ef.model_name if hasattr(ef, 'model_name') else ef.__class__.__name__
    return f"{chunker.__class__.__name__}_{chunk_size}_{ef_name}"

# Progress tracking
total_combinations = len(chunkers) * len(embedders)
current_combination = 0

# Run evaluation sweep
for chunker in chunkers:
    for ef in embedders:
        current_combination += 1
        try:
            print(f"Evaluating combination {current_combination}/{total_combinations}:")
            print(f"  Chunker: {chunker.__class__.__name__} (size: {chunker.sentences_per_chunk})")
            print(f"  Embedding: {ef.model_name if hasattr(ef, 'model_name') else ef.__class__.__name__}")
            
            # Run evaluation
            result = synthetic_pipeline.run(chunker, ef, retrieve=5)
            
            # Clean up and store results
            if 'corpora_scores' in result:
                del result['corpora_scores']
            
            # Add configuration identifiers
            result['chunker'] = chunker.__class__.__name__
            result['chunk_size'] = chunker.sentences_per_chunk
            result['embedding_function'] = ef.model_name if hasattr(ef, 'model_name') else ef.__class__.__name__
            result['config'] = get_config_name(chunker, ef)
            
            synth_results.append(result)
            clear_output(wait=True)

        except Exception as e:
            # Error Handling Just in Case
            print(f"Error in combination {current_combination}: {str(e)}")
            continue

# Create final DataFrame and display
synth_df = pd.DataFrame(synth_results)
print("\nFinal Results:")
display(synth_df)
file_name = "cybersecurity-DeepSeek-R1-Distill-Llama-70B-BGE-M3.xlsx"
synth_df.to_excel(file_name,index=False)


Final Results:


Unnamed: 0,iou_mean,iou_std,recall_mean,recall_std,precision_omega_mean,precision_omega_std,precision_mean,precision_std,chunker,chunk_size,embedding_function,config
0,0.042323,0.044814,0.491729,0.409595,0.195943,0.122531,0.044452,0.046763,SentenceChunker,5,OpenAIEmbeddingFunction,SentenceChunker_5_OpenAIEmbeddingFunction
1,0.030474,0.028673,0.61552,0.404676,0.124257,0.08278,0.031091,0.029549,SentenceChunker,10,OpenAIEmbeddingFunction,SentenceChunker_10_OpenAIEmbeddingFunction
2,0.021067,0.021181,0.593829,0.42013,0.09718,0.069179,0.021297,0.021525,SentenceChunker,15,OpenAIEmbeddingFunction,SentenceChunker_15_OpenAIEmbeddingFunction
3,0.019268,0.01905,0.651514,0.410001,0.082155,0.056704,0.019386,0.019183,SentenceChunker,20,OpenAIEmbeddingFunction,SentenceChunker_20_OpenAIEmbeddingFunction


In [8]:
# Defining our Configurations
chunkers = [
    SentenceChunker(sentences_per_chunk = 5),
    SentenceChunker(sentences_per_chunk = 10),
    SentenceChunker(sentences_per_chunk = 15),
    SentenceChunker(sentences_per_chunk = 20),
]

# Defining our Embedding Functions
embedders = [
    embedding_functions.OpenAIEmbeddingFunction(
    api_base="http://localhost:11434/v1",
    api_key="lm-studio",
    model_name="nomic-embed-text"
)

]

# Initialize Results Storage
synth_results = []

# Helper Function
def get_config_name(chunker, ef):
    chunk_size = chunker.sentences_per_chunk if hasattr(chunker, 'sentences_per_chunk') else 0
    ef_name = ef.model_name if hasattr(ef, 'model_name') else ef.__class__.__name__
    return f"{chunker.__class__.__name__}_{chunk_size}_{ef_name}"

# Progress tracking
total_combinations = len(chunkers) * len(embedders)
current_combination = 0

# Run evaluation sweep
for chunker in chunkers:
    for ef in embedders:
        current_combination += 1
        try:
            print(f"Evaluating combination {current_combination}/{total_combinations}:")
            print(f"  Chunker: {chunker.__class__.__name__} (size: {chunker.sentences_per_chunk})")
            print(f"  Embedding: {ef.model_name if hasattr(ef, 'model_name') else ef.__class__.__name__}")
            
            # Run evaluation
            result = synthetic_pipeline.run(chunker, ef, retrieve=5)
            
            # Clean up and store results
            if 'corpora_scores' in result:
                del result['corpora_scores']
            
            # Add configuration identifiers
            result['chunker'] = chunker.__class__.__name__
            result['chunk_size'] = chunker.sentences_per_chunk
            result['embedding_function'] = ef.model_name if hasattr(ef, 'model_name') else ef.__class__.__name__
            result['config'] = get_config_name(chunker, ef)
            
            synth_results.append(result)
            clear_output(wait=True)

        except Exception as e:
            # Error Handling Just in Case
            print(f"Error in combination {current_combination}: {str(e)}")
            continue

# Create final DataFrame and display
synth_df = pd.DataFrame(synth_results)
print("\nFinal Results:")
display(synth_df)
file_name = "cybersecurity-DeepSeek-R1-Distill-Llama-70B-Nomic.xlsx"
synth_df.to_excel(file_name,index=False)


Final Results:


Unnamed: 0,iou_mean,iou_std,recall_mean,recall_std,precision_omega_mean,precision_omega_std,precision_mean,precision_std,chunker,chunk_size,embedding_function,config
0,0.042222,0.048184,0.382884,0.392884,0.195943,0.122531,0.04458,0.049918,SentenceChunker,5,OpenAIEmbeddingFunction,SentenceChunker_5_OpenAIEmbeddingFunction
1,0.022414,0.030424,0.358783,0.416037,0.124257,0.08278,0.02293,0.030939,SentenceChunker,10,OpenAIEmbeddingFunction,SentenceChunker_10_OpenAIEmbeddingFunction
2,0.015498,0.023358,0.338482,0.421248,0.09718,0.069179,0.015734,0.023587,SentenceChunker,15,OpenAIEmbeddingFunction,SentenceChunker_15_OpenAIEmbeddingFunction
3,0.01047,0.016681,0.279655,0.388131,0.082155,0.056704,0.010635,0.01685,SentenceChunker,20,OpenAIEmbeddingFunction,SentenceChunker_20_OpenAIEmbeddingFunction
