In [1]:
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext, Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.node_parser import SentenceSplitter
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
import re
import mlflow
import time
import numpy as np
from sentence_transformers import util as st_util
import torch





In [2]:
mlflow.set_tracking_uri("file:./mlruns")  # local directory
mlflow.set_experiment("AUIChat-Embedding-Experiments")


<Experiment: artifact_location='file:///home/barneh/Rag-Based-LLM_AUIChat/rag_based_llm_auichat/notebooks/mlruns/180508001129329925', creation_time=1742769210427, experiment_id='180508001129329925', last_update_time=1742769210427, lifecycle_stage='active', name='AUIChat-Embedding-Experiments', tags={}>

2️⃣ Utility Functions 🧹
Clean Text

In [3]:
def clean_text(text):
    text = text.lower()
    text = re.sub(r'\s+', ' ', text).strip()
    text = re.sub(r'[^a-zA-Z0-9,.!?;:\'\"()\[\]\s]', '', text)
    return text


3️⃣ Define Embedding Models to Compare 📐

In [4]:
embedding_models = {
    "msmarco": "sentence-transformers/msmarco-distilbert-base-v4",
    "miniLM": "sentence-transformers/all-MiniLM-L6-v2",
    "bge": "BAAI/bge-small-en-v1.5"
}

4️⃣ Define Chunking Strategies ✂️

In [5]:
chunk_configs = [
    {"chunk_size": 400, "overlap": 50},
    {"chunk_size": 250, "overlap": 25},
    {"chunk_size": 100, "overlap": 0}
]

5️⃣ Load & Preprocess Documents 📄

In [6]:
documents = SimpleDirectoryReader("/home/barneh/Rag-Based-LLM_AUIChat/raw").load_data()

6️⃣ Helper Function for Running Experiments 🧪

In [7]:
def calculate_relevance_score(query, source_nodes):
    """Calculate relevance score between query and retrieved chunks"""
    if not source_nodes:
        return 0.0
    
    # Get the query embedding using the current embed model
    query_embedding = Settings.embed_model.get_query_embedding(query)
    
    # Get embeddings for all source nodes
    relevance_scores = []
    for node in source_nodes:
        if hasattr(node, 'embedding') and node.embedding is not None:
            node_embedding = node.embedding
        else:
            # If node doesn't have embedding, compute it
            node_embedding = Settings.embed_model.get_text_embedding(node.get_text())
            
        # Calculate cosine similarity
        if isinstance(query_embedding, list) and isinstance(node_embedding, list):
            query_tensor = torch.tensor([query_embedding])
            node_tensor = torch.tensor([node_embedding])
            score = st_util.pytorch_cos_sim(query_tensor, node_tensor).item()
        else:
            score = np.dot(query_embedding, node_embedding) / (np.linalg.norm(query_embedding) * np.linalg.norm(node_embedding))
            
        relevance_scores.append(score)
    
    # Return average relevance score
    return sum(relevance_scores) / len(relevance_scores) if relevance_scores else 0.0

def count_tokens(text):
    """Approximate token count using a simple whitespace-based approach"""
    return len(text.split())

def run_experiment(model_name, model_path, chunk_size, overlap, documents):
    """Run a single experiment with specific model and chunk configuration"""
    print(f"\n🔍 Testing with model: {model_name}")
    print(f"📏 Chunk size: {chunk_size}, Overlap: {overlap}")
    
    # Set up embedding model
    embed_model = HuggingFaceEmbedding(model_name=model_path)
    Settings.embed_model = embed_model
    
    # Start MLflow run
    with mlflow.start_run(run_name=f"{model_name}_{chunk_size}"):
        # Log parameters
        mlflow.log_param("embedding_model", model_name)
        mlflow.log_param("chunk_size", chunk_size)
        mlflow.log_param("overlap", overlap)
        
        # Process documents
        start_time = time.time()
        
        splitter = SentenceSplitter(chunk_size=chunk_size, chunk_overlap=overlap)
        nodes = splitter.get_nodes_from_documents(documents)
        
        for node in nodes:
            node.text = clean_text(node.text)
            
        # Create index
        storage_context = StorageContext.from_defaults()
        index = VectorStoreIndex(nodes, storage_context=storage_context)
        
        # Log metrics about the index
        index_build_time = time.time() - start_time
        mlflow.log_metric("index_build_time_seconds", index_build_time)
        mlflow.log_metric("total_chunks", len(nodes))
        
        # Set up query engine
        query_engine = index.as_query_engine(
            llm=HuggingFaceInferenceAPI(
                model_name="mistralai/Mistral-7B-Instruct-v0.3",
                token="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"
            )
        )
        
        # Run query with timing
        query = "What are the requirements for the PiP program?"
        query_start_time = time.time()
        response = query_engine.query(query)
        query_time = time.time() - query_start_time
        
        # Extract metrics from response
        source_nodes = getattr(response, 'source_nodes', [])
        retrieved_chunks = len(source_nodes) if source_nodes else 0
        response_text = str(response)
        response_tokens = count_tokens(response_text)
        
        # Calculate relevance score
        relevance_score = calculate_relevance_score(query, source_nodes)
        
        # Log response metrics
        mlflow.log_metric("response_time_seconds", query_time)
        mlflow.log_metric("retrieved_chunks", retrieved_chunks)
        mlflow.log_metric("token_count", response_tokens)
        mlflow.log_metric("relevance_score", relevance_score)
        mlflow.log_metric("tokens_per_second", response_tokens / query_time if query_time > 0 else 0)
        
        # Log response text
        mlflow.log_text(str(response), "response.txt")
        
        # Log source nodes for analysis
        if source_nodes:
            source_texts = "\n\n---\n\n".join([node.get_text() for node in source_nodes])
            mlflow.log_text(source_texts, "source_chunks.txt")
        
        print("🧠 Response:")
        print(response)
        print(f"⏱️ Response Time: {query_time:.2f} seconds")
        print(f"📊 Relevance Score: {relevance_score:.4f}")
        print(f"🧩 Chunks Retrieved: {retrieved_chunks}")
        print(f"🔤 Token Count: {response_tokens}")
        
        return response

7️⃣ Run Individual Experiments 🧪

Each experiment is separated into its own cell to prevent VS Code from crashing.

### Model: msmarco with chunk size: 400

In [8]:
# Run msmarco with chunk size 400
model_name = "msmarco"
model_path = embedding_models[model_name]
cfg = chunk_configs[0]  # {"chunk_size": 400, "overlap": 50}
run_experiment(model_name, model_path, cfg["chunk_size"], cfg["overlap"], documents)


🔍 Testing with model: msmarco
📏 Chunk size: 400, Overlap: 50
🧠 Response:
The PiP program requires applicants to be recent graduates of an American-style liberal arts model. They must speak English fluently and have recently graduated with an undergraduate degree within the last two academic years. Proficiency in French or Arabic is encouraged but not necessary. Applicants should submit an application form along with a short essay, limited to 750 words, discussing their interest in the program, special skills, talents, or experiences, and any special interest or experience they have in Morocco and the broader MENA region. The application deadline is May 5th, 2024, at 23:59 GMT+1. Prioritization is given to non-Moroccan nationals, but candidates from all nationalities and citizenship statuses are welcome.
⏱️ Response Time: 2.77 seconds
📊 Relevance Score: 0.2529
🧩 Chunks Retrieved: 2
🔤 Token Count: 111
🧠 Response:
The PiP program requires applicants to be recent graduates of an American-

Response(response='The PiP program requires applicants to be recent graduates of an American-style liberal arts model. They must speak English fluently and have recently graduated with an undergraduate degree within the last two academic years. Proficiency in French or Arabic is encouraged but not necessary. Applicants should submit an application form along with a short essay, limited to 750 words, discussing their interest in the program, special skills, talents, or experiences, and any special interest or experience they have in Morocco and the broader MENA region. The application deadline is May 5th, 2024, at 23:59 GMT+1. Prioritization is given to non-Moroccan nationals, but candidates from all nationalities and citizenship statuses are welcome.', source_nodes=[NodeWithScore(node=TextNode(id_='54bf9235-1d64-486e-9323-e4c4919c4cf2', embedding=None, metadata={'page_label': '1', 'file_name': 'PiP 24-25 Program Requirements.pdf', 'file_path': '/home/barneh/Rag-Based-LLM_AUIChat/raw/Pi

### Model: msmarco with chunk size: 250

In [9]:
# Run msmarco with chunk size 250
model_name = "msmarco"
model_path = embedding_models[model_name]
cfg = chunk_configs[1]  # {"chunk_size": 250, "overlap": 25}
run_experiment(model_name, model_path, cfg["chunk_size"], cfg["overlap"], documents)


🔍 Testing with model: msmarco
📏 Chunk size: 250, Overlap: 25
🧠 Response:
The PiP program requires applicants to be recent graduates of an American-style liberal arts model. They must speak English fluently, and proficiency in French or Arabic is encouraged but not necessary. Applicants must have recently graduated with an undergraduate degree within the last two academic years (fall-winter 2022-23 or later). The application process involves filling out an application form, submitting a short essay, an updated CV, and a letter addressed to the president of AUI, Dr. Amine Bensaid, specifying the candidate's top three internship choices ranked in order of preference.
⏱️ Response Time: 2.09 seconds
📊 Relevance Score: 0.2888
🧩 Chunks Retrieved: 2
🔤 Token Count: 90
🧠 Response:
The PiP program requires applicants to be recent graduates of an American-style liberal arts model. They must speak English fluently, and proficiency in French or Arabic is encouraged but not necessary. Applicants mus

Response(response="The PiP program requires applicants to be recent graduates of an American-style liberal arts model. They must speak English fluently, and proficiency in French or Arabic is encouraged but not necessary. Applicants must have recently graduated with an undergraduate degree within the last two academic years (fall-winter 2022-23 or later). The application process involves filling out an application form, submitting a short essay, an updated CV, and a letter addressed to the president of AUI, Dr. Amine Bensaid, specifying the candidate's top three internship choices ranked in order of preference.", source_nodes=[NodeWithScore(node=TextNode(id_='22c10500-cbe4-48d3-b5ff-6a38b024ec4e', embedding=None, metadata={'page_label': '1', 'file_name': 'PiP 24-25 Program Requirements.pdf', 'file_path': '/home/barneh/Rag-Based-LLM_AUIChat/raw/PiP 24-25 Program Requirements.pdf', 'file_type': 'application/pdf', 'file_size': 166707, 'creation_date': '2025-03-23', 'last_modified_date': '

### Model: msmarco with chunk size: 100

In [10]:
# Run msmarco with chunk size 100
model_name = "msmarco"
model_path = embedding_models[model_name]
cfg = chunk_configs[2]  # {"chunk_size": 100, "overlap": 0}
run_experiment(model_name, model_path, cfg["chunk_size"], cfg["overlap"], documents)


🔍 Testing with model: msmarco
📏 Chunk size: 100, Overlap: 0
🧠 Response:
The PiP program requires applicants to submit a cover letter, resume, and two letters of recommendation from faculty members, administrators, or work supervisors. The letters of recommendation should identify the student's general qualifications, middle eastern or African experience or interest, and ability to serve in the specific internships. The committee will use these materials to match qualified applicants with the office best suited for their interests, experiences, and skills. Shortlisted candidates will then be interviewed via Microsoft Teams, and the final decision will be made after reconvening.
⏱️ Response Time: 2.02 seconds
📊 Relevance Score: 0.1334
🧩 Chunks Retrieved: 2
🔤 Token Count: 87
🧠 Response:
The PiP program requires applicants to submit a cover letter, resume, and two letters of recommendation from faculty members, administrators, or work supervisors. The letters of recommendation should iden

Response(response="The PiP program requires applicants to submit a cover letter, resume, and two letters of recommendation from faculty members, administrators, or work supervisors. The letters of recommendation should identify the student's general qualifications, middle eastern or African experience or interest, and ability to serve in the specific internships. The committee will use these materials to match qualified applicants with the office best suited for their interests, experiences, and skills. Shortlisted candidates will then be interviewed via Microsoft Teams, and the final decision will be made after reconvening.", source_nodes=[NodeWithScore(node=TextNode(id_='b29c51ba-8aa2-4876-97ad-305f524d6bc8', embedding=None, metadata={'page_label': '1', 'file_name': 'PiP 24-25 Program Requirements.pdf', 'file_path': '/home/barneh/Rag-Based-LLM_AUIChat/raw/PiP 24-25 Program Requirements.pdf', 'file_type': 'application/pdf', 'file_size': 166707, 'creation_date': '2025-03-23', 'last_mod

### Model: miniLM with chunk size: 400

In [11]:
# Run miniLM with chunk size 400
model_name = "miniLM"
model_path = embedding_models[model_name]
cfg = chunk_configs[0]  # {"chunk_size": 400, "overlap": 50}
run_experiment(model_name, model_path, cfg["chunk_size"], cfg["overlap"], documents)


🔍 Testing with model: miniLM
📏 Chunk size: 400, Overlap: 50
🧠 Response:
The requirements for the PiP program include being a recent graduate, familiar with the American style liberal arts model, fluent in English, and having recently graduated with an undergraduate degree within the last two academic years. Proficiency in French or Arabic is encouraged but not necessary. The application process involves filling out an application form and submitting a short essay, limited to 750 words, discussing the candidate's interest in the AUI internship program, special skills, talents, or experiences, and any special interest or experience in Morocco and the broader MENA region. For applicants whose first language is not English, taking the TOEFL exam for English competency is required.
⏱️ Response Time: 2.11 seconds
📊 Relevance Score: 0.4539
🧩 Chunks Retrieved: 2
🔤 Token Count: 108
🧠 Response:
The requirements for the PiP program include being a recent graduate, familiar with the American styl

Response(response="The requirements for the PiP program include being a recent graduate, familiar with the American style liberal arts model, fluent in English, and having recently graduated with an undergraduate degree within the last two academic years. Proficiency in French or Arabic is encouraged but not necessary. The application process involves filling out an application form and submitting a short essay, limited to 750 words, discussing the candidate's interest in the AUI internship program, special skills, talents, or experiences, and any special interest or experience in Morocco and the broader MENA region. For applicants whose first language is not English, taking the TOEFL exam for English competency is required.", source_nodes=[NodeWithScore(node=TextNode(id_='b9e6d9f7-b869-4c33-a879-94a7989de2c1', embedding=None, metadata={'page_label': '1', 'file_name': 'PiP 24-25 Program Requirements.pdf', 'file_path': '/home/barneh/Rag-Based-LLM_AUIChat/raw/PiP 24-25 Program Requiremen

### Model: miniLM with chunk size: 250

In [12]:
# Run miniLM with chunk size 250
model_name = "miniLM"
model_path = embedding_models[model_name]
cfg = chunk_configs[1]  # {"chunk_size": 250, "overlap": 25}
run_experiment(model_name, model_path, cfg["chunk_size"], cfg["overlap"], documents)


🔍 Testing with model: miniLM
📏 Chunk size: 250, Overlap: 25
🧠 Response:
The PiP program requires applicants to be recent graduates of the American style liberal arts model. They must speak English fluently, and proficiency in French or Arabic is encouraged but not necessary. Applicants must have recently graduated with an undergraduate degree within the last two academic years (fall-winter 2022-23 or later). The program welcomes applications from candidates of all nationalities and citizenship statuses who have US-style, liberal arts undergraduate degrees. However, priority is given to non-Moroccan nationals. It's important to note that students who have completed their entire undergraduate education in Morocco are not eligible to apply. Previous interns have arrived with varying levels of Arabic and French knowledge, from native level fluency to no experience at all. For more information, you can contact Dr. Cherif Bel Fekih or Mrs. Sanaa Mokaddem via their provided contact details.


Response(response="The PiP program requires applicants to be recent graduates of the American style liberal arts model. They must speak English fluently, and proficiency in French or Arabic is encouraged but not necessary. Applicants must have recently graduated with an undergraduate degree within the last two academic years (fall-winter 2022-23 or later). The program welcomes applications from candidates of all nationalities and citizenship statuses who have US-style, liberal arts undergraduate degrees. However, priority is given to non-Moroccan nationals. It's important to note that students who have completed their entire undergraduate education in Morocco are not eligible to apply. Previous interns have arrived with varying levels of Arabic and French knowledge, from native level fluency to no experience at all. For more information, you can contact Dr. Cherif Bel Fekih or Mrs. Sanaa Mokaddem via their provided contact details.", source_nodes=[NodeWithScore(node=TextNode(id_='79f63

### Model: miniLM with chunk size: 100

In [13]:
# Run miniLM with chunk size 100
model_name = "miniLM"
model_path = embedding_models[model_name]
cfg = chunk_configs[2]  # {"chunk_size": 100, "overlap": 0}
run_experiment(model_name, model_path, cfg["chunk_size"], cfg["overlap"], documents)


🔍 Testing with model: miniLM
📏 Chunk size: 100, Overlap: 0
🧠 Response:
The requirements for the PiP program include being a recent graduate of an American style liberal arts model, fluency in English, and eligibility may be prioritized for non-Moroccan nationals.
⏱️ Response Time: 0.90 seconds
📊 Relevance Score: 0.5172
🧩 Chunks Retrieved: 2
🔤 Token Count: 29
🧠 Response:
The requirements for the PiP program include being a recent graduate of an American style liberal arts model, fluency in English, and eligibility may be prioritized for non-Moroccan nationals.
⏱️ Response Time: 0.90 seconds
📊 Relevance Score: 0.5172
🧩 Chunks Retrieved: 2
🔤 Token Count: 29


Response(response='The requirements for the PiP program include being a recent graduate of an American style liberal arts model, fluency in English, and eligibility may be prioritized for non-Moroccan nationals.', source_nodes=[NodeWithScore(node=TextNode(id_='229ef1f7-dfbe-4d1e-88e3-d9eaaae803f7', embedding=None, metadata={'page_label': '1', 'file_name': 'PiP 24-25 Program Requirements.pdf', 'file_path': '/home/barneh/Rag-Based-LLM_AUIChat/raw/PiP 24-25 Program Requirements.pdf', 'file_type': 'application/pdf', 'file_size': 166707, 'creation_date': '2025-03-23', 'last_modified_date': '2025-03-20'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='d8c85cd1-be14-41d3-af19-13c493b11d4b', node_type='4', metadat

### Model: bge with chunk size: 400

In [14]:
# Run bge with chunk size 400
model_name = "bge"
model_path = embedding_models[model_name]
cfg = chunk_configs[0]  # {"chunk_size": 400, "overlap": 50}
run_experiment(model_name, model_path, cfg["chunk_size"], cfg["overlap"], documents)


🔍 Testing with model: bge
📏 Chunk size: 400, Overlap: 50
🧠 Response:
The PiP program requires applicants to be recent graduates of an American-style liberal arts model. They must speak English fluently and have recently graduated with an undergraduate degree within the last two academic years. Proficiency in French or Arabic is encouraged but not necessary. Applicants should submit an application form along with a short essay, limited to 750 words, discussing their interest in the program, special skills, talents, or experiences, and any special interest or experience they have in Morocco and the broader MENA region. The application deadline is May 5th, 2024, at 23:59 GMT+1. Prioritization is given to non-Moroccan nationals, but candidates from all nationalities and citizenship statuses are welcome.
⏱️ Response Time: 0.30 seconds
📊 Relevance Score: 0.6481
🧩 Chunks Retrieved: 2
🔤 Token Count: 111
🧠 Response:
The PiP program requires applicants to be recent graduates of an American-styl

Response(response='The PiP program requires applicants to be recent graduates of an American-style liberal arts model. They must speak English fluently and have recently graduated with an undergraduate degree within the last two academic years. Proficiency in French or Arabic is encouraged but not necessary. Applicants should submit an application form along with a short essay, limited to 750 words, discussing their interest in the program, special skills, talents, or experiences, and any special interest or experience they have in Morocco and the broader MENA region. The application deadline is May 5th, 2024, at 23:59 GMT+1. Prioritization is given to non-Moroccan nationals, but candidates from all nationalities and citizenship statuses are welcome.', source_nodes=[NodeWithScore(node=TextNode(id_='cd805241-b74f-444d-8cb6-05a7fd5d6c07', embedding=None, metadata={'page_label': '1', 'file_name': 'PiP 24-25 Program Requirements.pdf', 'file_path': '/home/barneh/Rag-Based-LLM_AUIChat/raw/Pi

### Model: bge with chunk size: 250

In [15]:
# Run bge with chunk size 250
model_name = "bge"
model_path = embedding_models[model_name]
cfg = chunk_configs[1]  # {"chunk_size": 250, "overlap": 25}
run_experiment(model_name, model_path, cfg["chunk_size"], cfg["overlap"], documents)


🔍 Testing with model: bge
📏 Chunk size: 250, Overlap: 25
🧠 Response:
The PiP program requires applicants to be recent graduates of the American style liberal arts model. They must speak English fluently, and proficiency in French or Arabic is encouraged but not necessary. Applicants must have recently graduated with an undergraduate degree within the last two academic years (fall-winter 2022-23 or later). The program welcomes applications from candidates of all nationalities and citizenship statuses who have US-style, liberal arts undergraduate degrees. However, priority is given to non-Moroccan nationals. It's important to note that students who have completed their entire undergraduate education in Morocco are not eligible to apply. Previous interns have arrived with varying levels of Arabic and French knowledge, from native level fluency to no experience at all. For more information, you can contact Dr. Cherif Bel Fekih or Mrs. Sanaa Mokaddem via their provided contact details.
⏱️ 

Response(response="The PiP program requires applicants to be recent graduates of the American style liberal arts model. They must speak English fluently, and proficiency in French or Arabic is encouraged but not necessary. Applicants must have recently graduated with an undergraduate degree within the last two academic years (fall-winter 2022-23 or later). The program welcomes applications from candidates of all nationalities and citizenship statuses who have US-style, liberal arts undergraduate degrees. However, priority is given to non-Moroccan nationals. It's important to note that students who have completed their entire undergraduate education in Morocco are not eligible to apply. Previous interns have arrived with varying levels of Arabic and French knowledge, from native level fluency to no experience at all. For more information, you can contact Dr. Cherif Bel Fekih or Mrs. Sanaa Mokaddem via their provided contact details.", source_nodes=[NodeWithScore(node=TextNode(id_='205d7

### Model: bge with chunk size: 100

In [16]:
# Run bge with chunk size 100
model_name = "bge"
model_path = embedding_models[model_name]
cfg = chunk_configs[2]  # {"chunk_size": 100, "overlap": 0}
run_experiment(model_name, model_path, cfg["chunk_size"], cfg["overlap"], documents)


🔍 Testing with model: bge
📏 Chunk size: 100, Overlap: 0
🧠 Response:
The PiP program requires applicants to be recent graduates of an American-style liberal arts model. They must also speak English fluently. Additionally, while the program welcomes applications from candidates of all nationalities and citizenship statuses with US-style, liberal arts undergraduate degrees, priority is given to non-Moroccan nationals.
⏱️ Response Time: 1.28 seconds
📊 Relevance Score: 0.6708
🧩 Chunks Retrieved: 2
🔤 Token Count: 47
🧠 Response:
The PiP program requires applicants to be recent graduates of an American-style liberal arts model. They must also speak English fluently. Additionally, while the program welcomes applications from candidates of all nationalities and citizenship statuses with US-style, liberal arts undergraduate degrees, priority is given to non-Moroccan nationals.
⏱️ Response Time: 1.28 seconds
📊 Relevance Score: 0.6708
🧩 Chunks Retrieved: 2
🔤 Token Count: 47


Response(response='The PiP program requires applicants to be recent graduates of an American-style liberal arts model. They must also speak English fluently. Additionally, while the program welcomes applications from candidates of all nationalities and citizenship statuses with US-style, liberal arts undergraduate degrees, priority is given to non-Moroccan nationals.', source_nodes=[NodeWithScore(node=TextNode(id_='1981cda2-988e-4763-9da6-3579a09b33a9', embedding=None, metadata={'page_label': '1', 'file_name': 'PiP 24-25 Program Requirements.pdf', 'file_path': '/home/barneh/Rag-Based-LLM_AUIChat/raw/PiP 24-25 Program Requirements.pdf', 'file_type': 'application/pdf', 'file_size': 166707, 'creation_date': '2025-03-23', 'last_modified_date': '2025-03-20'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date'

### 8️⃣ Result Analysis

Run this cell to check all experiments in MLflow UI

In [17]:
# for model_name, model_path in embedding_models.items():
#     print(f"\n🔍 Testing with model: {model_name}")
#     embed_model = HuggingFaceEmbedding(model_name=model_path)
#     Settings.embed_model = embed_model
    
#     for cfg in chunk_configs:
#          with mlflow.start_run(run_name=f"{model_name}_{cfg['chunk_size']}"):

#             # log parameters
#             mlflow.log_param("embedding_model", model_name)
#             mlflow.log_param("chunk_size", cfg["chunk_size"])
#             mlflow.log_param("overlap", cfg["overlap"])

#             print(f"\n📏 Chunk size: {cfg['chunk_size']}, Overlap: {cfg['overlap']}")
#             splitter = SentenceSplitter(chunk_size=cfg["chunk_size"], chunk_overlap=cfg["overlap"])
#             nodes = splitter.get_nodes_from_documents(documents)
        
#             for node in nodes:
#                 node.text = clean_text(node.text)

#             storage_context = StorageContext.from_defaults()
#             index = VectorStoreIndex(nodes, storage_context=storage_context)
        
#             query_engine = index.as_query_engine(
#                 llm=HuggingFaceInferenceAPI(
#                     model_name="mistralai/Mistral-7B-Instruct-v0.3",
#                     token="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"  # your token
#             )
#         )

#             query = "What are the requirements for the PiP program?"
#             response = query_engine.query(query)
#              # log response as artifact or metric
#             mlflow.log_text(str(response), "response.txt")

#             print("🧠 Response:")
#             print(response)


### 9️⃣ Viewing Results in MLflow Dashboard

To visualize all experiment results in the MLflow dashboard, run the command below. This will start the MLflow UI server which you can access in your browser.

In [None]:
# Run this cell to start the MLflow dashboard server
# The dashboard will be available at http://127.0.0.1:5000
!mlflow ui --backend-store-uri file:./mlruns

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


[2025-04-12 11:17:25 -0400] [13220] [INFO] Starting gunicorn 23.0.0
[2025-04-12 11:17:25 -0400] [13220] [INFO] Listening at: http://127.0.0.1:5000 (13220)
[2025-04-12 11:17:25 -0400] [13220] [INFO] Using worker: sync
[2025-04-12 11:17:25 -0400] [13221] [INFO] Booting worker with pid: 13221
[2025-04-12 11:17:25 -0400] [13222] [INFO] Booting worker with pid: 13222
[2025-04-12 11:17:25 -0400] [13223] [INFO] Booting worker with pid: 13223
[2025-04-12 11:17:25 -0400] [13222] [INFO] Booting worker with pid: 13222
[2025-04-12 11:17:25 -0400] [13223] [INFO] Booting worker with pid: 13223
[2025-04-12 11:17:25 -0400] [13224] [INFO] Booting worker with pid: 13224
[2025-04-12 11:17:25 -0400] [13224] [INFO] Booting worker with pid: 13224


### MLflow Dashboard Guide

After starting the MLflow server with the command above, you can access the dashboard by:

1. Opening your browser and navigating to http://127.0.0.1:5000
2. Clicking on the "AUIChat-Embedding-Experiments" experiment
3. Viewing all runs comparing different embedding models and chunk sizes
4. For each run, you can:
   - See parameters (embedding model, chunk size, overlap)
   - View the responses in the "Artifacts" section
   - Compare runs side-by-side by selecting multiple runs

This will help you analyze which combination of embedding model and chunking strategy produces the best results for your specific queries.