# Finetune embeddings

# Set up

In [1]:
import os
from loguru import logger
from tqdm.notebook import tqdm

In [2]:
import pandas as pd

In [3]:
from pydantic import BaseModel

class Config(BaseModel):
    testing: bool = False

    data_fp: str = "../data/yelp_dataset/sample/sample_100_biz/denom_review.parquet"
    storage_context_persist_dp: str = "./data/finetune_embedding/storage_context"
    gen_qa_embedding_pairs_dp: str = "./data/finetune_embedding/gen_qa_embedding_pairs"
    ft_model_dp: str = "./data/finetune_embedding/finetuned_model"
    eval_dp: str = "./data/finetune_embedding/eval"

    num_questions_per_chunk: int = 2

cfg = Config()

In [4]:
data = pd.read_parquet(cfg.data_fp)
data = data.assign(
    biz_categories=lambda df: df['biz_categories'].str.split(', '),
    date=lambda df: df['date'].dt.strftime('%Y-%m-%dT%H:%M:%S')
)
logger.info(f"[COLLECT] {len(data)=}")
data.iloc[0]

[32m2024-08-10 08:17:37.950[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m6[0m - [1m[COLLECT] len(data)=5240[0m


review_id                                      L0jv8c2FbpWSlfNC6bbUEA
user_id                                        bFPdtzu11Oi0f92EAcjqmg
business_id                                    IDtLPgUrqorrpqSLdfMhZQ
review_stars                                                        5
useful                                                              0
funny                                                               0
cool                                                                0
text                What a great addition to the Funk Zone!  Grab ...
date                                              2016-10-13T22:50:47
biz_name                                         Helena Avenue Bakery
biz_address                                     131 Anacapa St, Ste C
biz_city                                                Santa Barbara
biz_state                                                          CA
biz_postal_code                                                 93101
biz_latitude        

In [5]:
data.shape

(5240, 22)

In [6]:
business_ids = data['business_id'].unique()
len(business_ids)

100

# Split train test

In [7]:
import numpy as np

In [8]:
TRAIN_RATIO = 0.7

np.random.seed(41)
train_business_ids = np.random.choice(business_ids, int(TRAIN_RATIO * len(business_ids)), replace=False)
val_business_ids = [id_ for id_ in business_ids if id_ not in train_business_ids]
logger.info(f"{len(train_business_ids)=}, {len(val_business_ids)=}")

[32m2024-08-10 08:17:37.989[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m6[0m - [1mlen(train_business_ids)=70, len(val_business_ids)=30[0m


In [9]:
train_df = data.loc[lambda df: df['business_id'].isin(train_business_ids)]
val_df = data.loc[lambda df: df['business_id'].isin(val_business_ids)]

# Convert to Documents

In [10]:
from llama_index.core import Document

def prep_documents(input_data: pd.DataFrame):
    documents = []
    embedding_visible_metadata = ["review_stars", "biz_name", "biz_address", "biz_city", "biz_state", "biz_categories"]
    excluded_embed_metadata_keys = [k for k in input_data.columns if k not in embedding_visible_metadata]
    
    for i, row in tqdm(input_data.iterrows(), total=len(input_data)):
        record = row.to_dict()
        text = record['text']
        metadata = {k: v for k, v in record.items() if k not in ('text')}
        
        doc = Document(
            text=text,
            metadata=metadata,
            excluded_embed_metadata_keys=excluded_embed_metadata_keys,
            excluded_llm_metadata_keys=excluded_embed_metadata_keys
        )
        documents.append(doc)
    
    return documents

In [11]:
train_docs = prep_documents(train_df)
val_docs = prep_documents(val_df)

  0%|          | 0/3176 [00:00<?, ?it/s]

  0%|          | 0/2064 [00:00<?, ?it/s]

In [12]:
train_docs[0]

Document(id_='566d233c-c727-4485-9b0f-e8f53452b013', embedding=None, metadata={'review_id': 'L0jv8c2FbpWSlfNC6bbUEA', 'user_id': 'bFPdtzu11Oi0f92EAcjqmg', 'business_id': 'IDtLPgUrqorrpqSLdfMhZQ', 'review_stars': 5, 'useful': 0, 'funny': 0, 'cool': 0, 'date': '2016-10-13T22:50:47', 'biz_name': 'Helena Avenue Bakery', 'biz_address': '131 Anacapa St, Ste C', 'biz_city': 'Santa Barbara', 'biz_state': 'CA', 'biz_postal_code': '93101', 'biz_latitude': 34.4144445, 'biz_longitude': -119.6906718, 'biz_stars': 4.0, 'biz_review_count': 389, 'biz_is_open': 1, 'biz_attributes': {'AcceptsInsurance': None, 'Alcohol': "u'none'", 'Ambience': "{'touristy': False, 'hipster': True, 'romantic': False, 'divey': False, 'intimate': False, 'trendy': True, 'upscale': False, 'classy': False, 'casual': True}", 'BYOB': None, 'BestNights': None, 'BikeParking': 'True', 'BusinessAcceptsBitcoin': None, 'BusinessAcceptsCreditCards': 'True', 'BusinessParking': "{'garage': False, 'street': True, 'validated': False, 'lot'

# Chunking

In [13]:
import time
import pickle
from multiprocessing import set_start_method
from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.core.node_parser import SemanticSplitterNodeParser

from llama_index.core.extractors import TitleExtractor
from llama_index.core.ingestion import IngestionPipeline, IngestionCache

In [14]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

embedding_model_name = "Snowflake/snowflake-arctic-embed-m-v1.5"
embed_model = HuggingFaceEmbedding(model_name=embedding_model_name)

You try to use a model that was created with version 2.7.0.dev0, however, your version is 2.7.0. This might cause unexpected behavior or errors. In that case, try to update to the latest version.



Some weights of BertModel were not initialized from the model checkpoint at Snowflake/snowflake-arctic-embed-m-v1.5 and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [15]:
chunker = SemanticSplitterNodeParser
chunker_cfg = {
    "buffer_size": 1,
    "breakpoint_percentile_threshold": 95,
    "embed_model": embed_model
}

In [16]:
RECREATE_INDEX = False

In [17]:
from llama_index.core import StorageContext

if RECREATE_INDEX or not os.path.exists(f"{cfg.storage_context_persist_dp}/train"):
    t0 = time.perf_counter()
    # create the pipeline with transformations
    pipeline = IngestionPipeline(
        transformations=[
            chunker(**chunker_cfg),
            embed_model,
        ]
    )
    
    num_workers = None
    # TODO: I can get the preprocessing completed much quicker if I push the initial steps through a multiprocessing loop and then separately create the embeddings using the built-in batching already provided in the SentenceTransformer encode method.
    # Ref: https://github.com/run-llama/llama_index/issues/10104#issuecomment-1899401584
    # Currently setting num_workers leads to code simple hang
    # Ref: https://github.com/run-llama/llama_index/issues/10104
    # num_workers = os.cpu_count() - 1
    # os.environ['TOKENIZERS_PARALLELISM'] = 'true'
    # set_start_method("spawn", force=True)  # it hangs without this line
    logger.info(f"Running Ingestion Pipeline with {num_workers=}...")
    train_nodes = await pipeline.arun(documents=train_docs, num_workers=num_workers, show_progress=True)
    val_nodes = await pipeline.arun(documents=val_docs, num_workers=num_workers, show_progress=True)
    t1 = time.perf_counter()
    logger.info(f"Building nodes for train and val took {t1 - t0:,.0f}s")
    
    t0 = time.perf_counter()
    # Define Docstore as an abstraction on top of nodes to easily manage (e.g. get node by id)
    train_docstore = SimpleDocumentStore()
    await train_docstore.async_add_documents(train_nodes)
    train_storage_context = StorageContext.from_defaults(
        docstore=train_docstore
    )
    train_storage_context.persist(f"{cfg.storage_context_persist_dp}/train")
    
    val_docstore = SimpleDocumentStore()
    await val_docstore.async_add_documents(val_nodes)
    val_storage_context = StorageContext.from_defaults(
        docstore=val_docstore
    )
    val_storage_context.persist(f"{cfg.storage_context_persist_dp}/val")
    
    t1 = time.perf_counter()
    logger.info(f"Persisting storage_context for train and val took {t1 - t0:,.0f}s")
else:
    logger.info(f"Loading Storage Context from {cfg.storage_context_persist_dp}...")
    train_docstore = SimpleDocumentStore.from_persist_dir(persist_dir=f"{cfg.storage_context_persist_dp}/train")
    train_storage_context = StorageContext.from_defaults(
        docstore=train_docstore
    )
    train_nodes = list(train_docstore.docs.values())

    val_docstore = SimpleDocumentStore.from_persist_dir(persist_dir=f"{cfg.storage_context_persist_dp}/val")
    val_storage_context = StorageContext.from_defaults(
        docstore=val_docstore
    )
    val_nodes = list(val_docstore.docs.values())

[32m2024-08-10 08:17:46.553[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m46[0m - [1mLoading Storage Context from ./data/finetune_embedding/storage_context...[0m


# Generate synthetic queries

In [18]:
REGEN_DATASET = False

In [19]:
from llama_index.finetuning import generate_qa_embedding_pairs
from llama_index.core.evaluation import generate_question_context_pairs
from llama_index.core.evaluation import EmbeddingQAFinetuneDataset
from llama_index.core.schema import MetadataMode

In [20]:
if cfg.testing:
    gen_train_nodes = train_nodes[:10]
    gen_val_nodes = val_nodes[:10]
else:
    gen_train_nodes = train_nodes
    gen_val_nodes = val_nodes

In [21]:
logger.info(f"{len(gen_train_nodes)=}, {len(gen_val_nodes)=}")

[32m2024-08-10 08:17:54.381[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m1[0m - [1mlen(gen_train_nodes)=6218, len(gen_val_nodes)=4001[0m


In [22]:
from llama_index.llms.openai import OpenAI

llm_model_name = 'gpt-4o-mini'
llm = OpenAI(model=llm_model_name)

In [23]:
print(gen_train_nodes[0].get_content(metadata_mode=MetadataMode.LLM))

review_stars: 5
biz_name: Helena Avenue Bakery
biz_address: 131 Anacapa St, Ste C
biz_city: Santa Barbara
biz_state: CA
biz_categories: ['Food', 'Restaurants', 'Salad', 'Coffee & Tea', 'Breakfast & Brunch', 'Sandwiches', 'Bakeries']

What a great addition to the Funk Zone!  Grab a bite, grab some tastings, life is good.


In [24]:
train_output_path = f"{cfg.gen_qa_embedding_pairs_dp}/train/qa_finetuned_dataset.json"
val_output_path = f"{cfg.gen_qa_embedding_pairs_dp}/val/qa_finetuned_dataset.json"

if REGEN_DATASET:
    # Generate 10K pairs of questions and answers costs about 0.7 USD using GPT-4o-mini
    gen_query_prompt_tmpl = """
    You are a helpful assistant.
    
    Your task is to generate {num_questions_per_chunk} questions based on only the given context, not prior information.
    The questions are aim to find businesses/locations to go to, for example: restaurants, shopping mall, parking lots, ...
    
    <EXAMPLE>
    Input context: Biz_name: Clara's Kitchen. What a great addition to the Funk Zone!  Grab a bite, grab some tastings, life is good. Right next door to the Santa Barbara Wine Collective, in fact it actually shares the same tables.  We had a fabulous savory croissant.
    Output questions: What are some recommended restaurants in Funk Zone?
    
    Some example of good generated questions:
    - What are some reliable shipping or delivery services in Affton?
    - What are some clothing stores with good quality customer service or support?
    
    </EXAMPLE>
    
    IMPORTANT RULES:
    - The generated questions must be specific about the categories of businesses it's looking for. A good generated question would have its typical answer being: Here are some options for you: Place A because..., Place B because...
    - Restrict the generated question to the context information provided
    - Pay attention to the sentiment of the context review. If the review is bad then never return a question that ask for a good experience.
    - Do not mention anything about the context in the generated queries
    - The generated questions must be complete on its own. Do not assume the person receiving the question know anything about the person asking the question. for example never use "in my area" or "near me".
    
    Input context is below.
    
    ---------------------
    {context_str}
    ---------------------
    
    Output questions:
    """
    t0 = time.perf_counter()
    
    train_dataset = generate_question_context_pairs(
        gen_train_nodes,
        llm=llm,
        num_questions_per_chunk=1,
        qa_generate_prompt_tmpl=gen_query_prompt_tmpl,
    )
    
    t1 = time.perf_counter()
    logger.info(f"Generate {len(gen_train_nodes) * cfg.num_questions_per_chunk} questions for train set took {t1 - t0:,.0f}s")
    
    val_dataset = generate_question_context_pairs(
        gen_val_nodes,
        llm=llm,
        num_questions_per_chunk=1,
        qa_generate_prompt_tmpl=gen_query_prompt_tmpl,
    )
    
    t2 = time.perf_counter()
    logger.info(f"Generate {len(gen_val_nodes) * cfg.num_questions_per_chunk} questions for val set took {t1 - t0:,.0f}s")

    os.makedirs(os.path.dirname(train_output_path), exist_ok=True)
    logger.info(f"Persisting train dataset to {train_output_path}...")
    train_dataset.save_json(train_output_path)
    
    os.makedirs(os.path.dirname(val_output_path), exist_ok=True)
    logger.info(f"Persisting val dataset to {val_output_path}...")
    val_dataset.save_json(val_output_path)
else:
    logger.info(f"Loading train and val dataset from disk...")
    train_dataset = EmbeddingQAFinetuneDataset.from_json(train_output_path)
    val_dataset = EmbeddingQAFinetuneDataset.from_json(val_output_path)

[32m2024-08-10 08:17:54.415[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m66[0m - [1mLoading train and val dataset from disk...[0m


In [26]:
logger.info(f"len train_dataset: {len(list(train_dataset.relevant_docs.values()))}, len val_dataset: {len(list(val_dataset.relevant_docs.values()))}")

[32m2024-08-10 08:17:54.478[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m1[0m - [1mlen train_dataset: 6218, len val_dataset: 4001[0m


In [25]:
sanity_node = list(val_dataset.relevant_docs.values())[0][0]
sanity_question = list(val_dataset.queries.values())[0]

In [27]:
print(f"""
Generated Question:
{sanity_question}

From Context:
{val_docstore.get_node(sanity_node).get_content(metadata_mode=MetadataMode.LLM)}
"""
)


Generated Question:
What are some restaurants known for large portion sizes?

From Context:
review_stars: 5
biz_name: Mike's Ice Cream
biz_address: 129 2nd Ave N
biz_city: Nashville
biz_state: TN
biz_categories: ['Ice Cream & Frozen Yogurt', 'Coffee & Tea', 'Restaurants', 'Sandwiches', 'Food']

Super tasty and enormous portions.



# Run Embedding Finetuning

In [28]:
from llama_index.finetuning import SentenceTransformersFinetuneEngine

<p style="background-color:#fff6e4; padding:15px; border-width:3px; border-color:#f5ecda; border-style:solid; border-radius:6px"> ⏳ <b>Note <code>(Training Duration)</code>:</b>
The below fine-tuning takes about 2 hours to complete.
</p>

In [29]:
BATCH_SIZE = 4 # For 4 GB vRAM device can not take more. Looks like 1 unit increase in batch size takes up 500 MB vRAM

finetune_engine = SentenceTransformersFinetuneEngine(
    train_dataset,
    model_id=embedding_model_name,
    model_output_path=cfg.ft_model_dp,
    val_dataset=val_dataset,
    batch_size=BATCH_SIZE
)

INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: Snowflake/snowflake-arctic-embed-m-v1.5
Load pretrained SentenceTransformer: Snowflake/snowflake-arctic-embed-m-v1.5



You try to use a model that was created with version 2.7.0.dev0, however, your version is 2.7.0. This might cause unexpected behavior or errors. In that case, try to update to the latest version.





Some weights of BertModel were not initialized from the model checkpoint at Snowflake/snowflake-arctic-embed-m-v1.5 and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cuda
Use pytorch device_name: cuda
INFO:sentence_transformers.SentenceTransformer:1 prompts are loaded, with the keys: ['query']
1 prompts are loaded, with the keys: ['query']


In [30]:
%%time
finetune_engine.finetune()

Epoch:   0%|          | 0/2 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1555 [00:00<?, ?it/s]

INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Information Retrieval Evaluation of the model on the  dataset in epoch 0 after 50 steps:
Information Retrieval Evaluation of the model on the  dataset in epoch 0 after 50 steps:
INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Queries: 4001
Queries: 4001
INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Corpus: 4001

Corpus: 4001

INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Score-Function: cos_sim
Score-Function: cos_sim
INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Accuracy@1: 14.90%
Accuracy@1: 14.90%
INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Accuracy@3: 22.32%
Accuracy@3: 22.32%
INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Accuracy@5: 26.47%
Accuracy@5: 26.47%
INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Accuracy@10: 32.59%
Accuracy@10: 32.59%
INFO:sentence_transf

Iteration:   0%|          | 0/1555 [00:00<?, ?it/s]

INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Information Retrieval Evaluation of the model on the  dataset in epoch 1 after 50 steps:
Information Retrieval Evaluation of the model on the  dataset in epoch 1 after 50 steps:
INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Queries: 4001
Queries: 4001
INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Corpus: 4001

Corpus: 4001

INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Score-Function: cos_sim
Score-Function: cos_sim
INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Accuracy@1: 39.84%
Accuracy@1: 39.84%
INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Accuracy@3: 54.44%
Accuracy@3: 54.44%
INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Accuracy@5: 61.23%
Accuracy@5: 61.23%
INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Accuracy@10: 69.33%
Accuracy@10: 69.33%
INFO:sentence_transf

In [31]:
embed_model = finetune_engine.get_finetuned_model()
embed_model

INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: ./data/finetune_embedding/finetuned_model
Load pretrained SentenceTransformer: ./data/finetune_embedding/finetuned_model



You try to use a model that was created with version 2.7.0.dev0, however, your version is 2.7.0. This might cause unexpected behavior or errors. In that case, try to update to the latest version.



INFO:sentence_transformers.SentenceTransformer:2 prompts are loaded, with the keys: ['query', 'text']
2 prompts are loaded, with the keys: ['query', 'text']


HuggingFaceEmbedding(model_name='./data/finetune_embedding/finetuned_model', embed_batch_size=10, callback_manager=<llama_index.core.callbacks.base.CallbackManager object at 0x7b4a21638690>, num_workers=None, max_length=512, normalize=True, query_instruction=None, text_instruction=None, cache_folder=None)

# Evaluate

In [32]:
from sentence_transformers.evaluation import InformationRetrievalEvaluator
from sentence_transformers import SentenceTransformer
from pathlib import Path
import shutil

def evaluate_st(
    dataset,
    model_id,
    name,
    output_path: str = None
):
    corpus = dataset.corpus
    queries = dataset.queries
    relevant_docs = dataset.relevant_docs

    evaluator = InformationRetrievalEvaluator(
        queries, corpus, relevant_docs, name=name
    )

    model = SentenceTransformer(model_id)

    if os.path.exists(output_path):
        shutil.rmtree(output_path)
    Path(output_path).mkdir(exist_ok=True, parents=True)
    return evaluator(model, output_path=output_path)

In [33]:
name = 'bge'
model_id = "BAAI/bge-large-en"
output_csv_filename = f"Information-Retrieval_evaluation_{name}_results.csv"
output_path = f"{cfg.eval_dp}/{name}"
evaluate_st(val_dataset, model_id, name, output_path=output_path)
bge_eval_results = pd.read_csv(f"{output_path}/{output_csv_filename}")

INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: BAAI/bge-large-en
Load pretrained SentenceTransformer: BAAI/bge-large-en
INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cuda
Use pytorch device_name: cuda
INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Information Retrieval Evaluation of the model on the bge dataset:
Information Retrieval Evaluation of the model on the bge dataset:


OutOfMemoryError: CUDA out of memory. Tried to allocate 256.00 MiB. GPU 0 has a total capacity of 3.72 GiB of which 152.00 MiB is free. Including non-PyTorch memory, this process has 3.49 GiB memory in use. Of the allocated memory 2.98 GiB is allocated by PyTorch, and 413.31 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
bge_eval_results

In [34]:
name = 'finetuned'
model_id = cfg.ft_model_dp
output_csv_filename = f"Information-Retrieval_evaluation_{name}_results.csv"
output_path = f"{cfg.eval_dp}/{name}"
evaluate_st(val_dataset, model_id, name, output_path=output_path)
ft_eval_results = pd.read_csv(f"{output_path}/{output_csv_filename}")

INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: ./data/finetune_embedding/finetuned_model
Load pretrained SentenceTransformer: ./data/finetune_embedding/finetuned_model



You try to use a model that was created with version 2.7.0.dev0, however, your version is 2.7.0. This might cause unexpected behavior or errors. In that case, try to update to the latest version.



INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cuda
Use pytorch device_name: cuda
INFO:sentence_transformers.SentenceTransformer:1 prompts are loaded, with the keys: ['query']
1 prompts are loaded, with the keys: ['query']
INFO:sentence_transformers.evaluation.InformationRetrievalEvaluator:Information Retrieval Evaluation of the model on the finetuned dataset:
Information Retrieval Evaluation of the model on the finetuned dataset:


OutOfMemoryError: CUDA out of memory. Tried to allocate 12.00 MiB. GPU 0 has a total capacity of 3.72 GiB of which 12.00 MiB is free. Including non-PyTorch memory, this process has 3.63 GiB memory in use. Of the allocated memory 3.41 GiB is allocated by PyTorch, and 116.72 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
ft_eval_results

In [None]:
name = 'arctic'
model_id = "Snowflake/snowflake-arctic-embed-m-v1.5"
output_csv_filename = f"Information-Retrieval_evaluation_{name}_results.csv"
output_path = f"{cfg.eval_dp}/{name}"
evaluate_st(val_dataset, model_id, name, output_path=output_path)
arctic_eval_results = pd.read_csv(f"{output_path}/{output_csv_filename}")

In [None]:
arctic_eval_results

In [None]:
eval_df = pd.concat([
    pd.Series(arctic_eval_results.loc[0], name='arctic'),
    pd.Series(bge_eval_results.loc[0], name='bge'),
    pd.Series(ft_eval_results.loc[0], name='ft'),
], axis=1)

(
    eval_df
    .style
    .background_gradient(axis=1, low=0, high=1)
)

# Archive