In [1]:
import financerag.tasks as tasks_module

import importlib
import inspect
import os
import json
import pandas as pd

import numpy as np
import json
import pandas as pd
import warnings

warnings.filterwarnings("ignore", category=DeprecationWarning)

In [2]:
retrieval = ['FinDER', 'FinQABench', 'FinanceBench', 'TATQA', 'FinQA', 'ConvFinQA', 'MultiHiertt']
tabular_retrieval = ['TATQA', 'FinQA', 'ConvFinQA', 'MultiHiertt']

In [3]:
def tmm_normalize(scores, min_value, max_value):
    """Theoretical Min-Max Normalization with min/max calculated from metric."""
    normalized_scores = {}
    for doc_id, score in scores.items():
        normalized_scores[doc_id] = (score - min_value) / (max_value - min_value)
    return normalized_scores

def mm_normalize(scores):
    """Theoretical Min-Max Normalization with min/max calculated from data."""
    min_value = min(scores.values())
    max_value = max(scores.values())
    
    if max_value == min_value:
        return {doc_id: 0.0 for doc_id in scores}

    normalized_scores = {}
    for doc_id, score in scores.items():
        normalized_scores[doc_id] = (score - min_value) / (max_value - min_value)
    
    return normalized_scores

def comb_sum_fusion_alpha(vector_result, lexical_result, alpha, normalize_type):
    fused_results = {}
    
    # Iterate over each query in vector_result
    for query_id in vector_result:
        vector_scores = vector_result[query_id]
        lexical_scores = lexical_result[query_id]

        if normalize_type == "tmm":
            normalized_vector_scores = tmm_normalize(vector_scores, min_value=-1, max_value=1)
            normalized_lexical_scores = tmm_normalize(lexical_scores, min_value=0, max_value=1)
        else:
            normalized_vector_scores = mm_normalize(vector_scores)
            normalized_lexical_scores = mm_normalize(lexical_scores)
        
        # Initialize a dictionary to store fused scores for the current query
        fused_query_results = {}

        # Sum normalized vector and lexical scores with alpha and (1 - alpha)
        for doc_id, score in normalized_vector_scores.items():
            fused_query_results[doc_id] = fused_query_results.get(doc_id, 0) + score * alpha

        for doc_id, score in normalized_lexical_scores.items():
            fused_query_results[doc_id] = fused_query_results.get(doc_id, 0) + score * (1 - alpha)

        sorted_fused_query_results = dict(sorted(fused_query_results.items(), key=lambda x: x[1], reverse=True))

        fused_results[query_id] = sorted_fused_query_results

    return fused_results

In [4]:
alpha_values = np.arange(0, 1.01, 0.01)
results_list = []
normalize_type = "mm"
    
for task_class in tabular_retrieval:
    task_class_obj = getattr(tasks_module, task_class)
    finder_task = task_class_obj()

    lexical = "BM25"
    
    if task_class == "MultiHiertt":
        dense = "voyage-finance-2"
    elif task_class in ["FinQABench", "FinanceBench"]:
        dense = "financial-rag-matryoshka"
    elif task_class == "FinDER":
        dense = "stella_en_1.5B_v5"
    elif task_class in ["TATQA", "FinQA", "ConvFinQA"]:
        dense = "voyage-3"

    # Save Directory
    output_dir = f'./hybrid_search'
    os.makedirs(output_dir, exist_ok=True)
    
    versions = ["default", "convert"] if task_class in tabular_retrieval else ["default"]

    for version in versions:
        # Set paths for the version
        vector_result_path = f'./{dense}/{task_class}_{version}.json' if version != "default" else f'./{dense}/{task_class}.json'
        lexical_result_path = f'./{lexical}/{task_class}_{version}.json' if version != "default" else f'./{lexical}/{task_class}.json'
        
        # Initialize to track the best NDCG@10 score and the corresponding alpha
        best_ndcg_10 = -1
        best_alpha = None

        for alpha in alpha_values:
            # Load results from both dense and lexical models
            with open(vector_result_path, 'r', encoding='utf-8') as f:
                vector_result = json.load(f)

            with open(lexical_result_path, 'r', encoding='utf-8') as f:
                lexical_result = json.load(f)

            # Perform CombSUM fusion with the current alpha
            fused_results = comb_sum_fusion_alpha(vector_result, lexical_result, alpha, normalize_type)

            # Load and prepare qrels for evaluation
            df = pd.read_csv(f'./eval/{task_class}_qrels.tsv', sep='\t')
            qrels_dict = df.groupby('query_id').apply(lambda x: dict(zip(x['corpus_id'], x['score']))).to_dict()
            
            eval_result = finder_task.evaluate(qrels_dict, fused_results, [1, 5, 10])
            ndcg_10 = eval_result[0]['NDCG@10'] 
            
            # Track the best alpha and NDCG@10 score
            if ndcg_10 > best_ndcg_10:
                best_ndcg_10 = ndcg_10
                best_alpha = alpha
                
                # Save the best fused results for this version
                output_file = f'{output_dir}/{task_class}_{version}_{normalize_type}_best_cc.json' if version != "default" else f'{output_dir}/{task_class}_{normalize_type}_best_cc.json'
                with open(output_file, 'w', encoding='utf-8') as f:
                    json.dump(fused_results, f, ensure_ascii=False, indent=4)

        # Append results to the list
        results_list.append({
            'Task': f"{task_class}_{version}" if version != "default" else task_class,
            'Best Alpha': best_alpha,
            'Best NDCG@10': best_ndcg_10
        })


# Convert the results list to a DataFrame and save as a CSV file
results_df = pd.DataFrame(results_list)
results_df.to_csv(f'{output_dir}/{normalize_type}_best_results.csv', index=False)

A Hugging Face repository is provided. This will override the data_folder, prefix, and *_file arguments.
A Hugging Face repository is provided. This will override the data_folder, prefix, and *_file arguments.
A Hugging Face repository is provided. This will override the data_folder, prefix, and *_file arguments.
A Hugging Face repository is provided. This will override the data_folder, prefix, and *_file arguments.
