In [1]:
import sys
import os
import warnings
warnings.filterwarnings('ignore')

# Add project root to path
sys.path.append(os.path.abspath('../'))

from locobench.analysis.numerical_analysis import *

In [2]:
os.chdir("../")
#os.getcwd() # must point to project root (LoCoBench)

In [3]:
language_order = ["en", "zh", "de", "it", "ko", "hi"]
PATHS_MGTE_MONO, PATHS_MGTE_MULTI = categorize_paths_by_root("results/wiki_parallel", "Alibaba-NLP_gte-multilingual-base__wiki_parallel_en_de_hi_it_ko_zh__parallel__", language_order=language_order)
PATHS_MGTE = PATHS_MGTE_MONO + PATHS_MGTE_MULTI

language_order = ["en", "zh", "de", "it", "ko", "hi"]
PATHS_JINA_MONO, PATHS_JINA_MULTI = categorize_paths_by_root("results/wiki_parallel", "jinaai_jina-embeddings-v3__wiki_parallel_en_de_hi_it_ko_zh__parallel__", language_order=language_order)
PATHS_JINA = PATHS_JINA_MONO + PATHS_JINA_MULTI

PATHS_MONO = PATHS_JINA_MONO + PATHS_MGTE_MONO 
PATHS_MULTI = PATHS_JINA_MULTI + PATHS_MGTE_MULTI

print(f"MGTE monolingual paths: {len(PATHS_MGTE_MONO)}")
print(f"MGTE multilingual paths: {len(PATHS_MGTE_MULTI)}")
print(f"Jina monolingual paths: {len(PATHS_JINA_MONO)}")
print(f"Jina multilingual paths: {len(PATHS_JINA_MULTI)}")

model_pooling_strats = {
    "Alibaba-NLP/gte-multilingual-base": "cls",
    "jinaai/jina-embeddings-v3": "mean",
    "Qwen/Qwen3-Embedding-0.6B": "cls"
}

MGTE monolingual paths: 24
MGTE multilingual paths: 80
Jina monolingual paths: 24
Jina multilingual paths: 80


In [4]:
# Collect results for Exp1
all_results_MONO_exp1 = collect_multi_model_position_analysis_results(
    paths=PATHS_MONO,
    model_pooling_strats=model_pooling_strats
)
all_results_MULTI_exp1 = collect_multi_model_position_analysis_results(
    paths=PATHS_MULTI,
    model_pooling_strats=model_pooling_strats
)


# Collect results for Exp2
all_results_MONO_exp2 = collect_multi_model_position_analysis_results(
    paths=PATHS_JINA_MONO,
    model_pooling_strats=model_pooling_strats,
    document_embedding_type="latechunk-segment"
)
all_results_MULTI_exp2 = collect_multi_model_position_analysis_results(
    paths=PATHS_JINA_MULTI,
    model_pooling_strats=model_pooling_strats,
    document_embedding_type="latechunk-segment"
)

In [5]:
# Compute statistical metrics, e.g., for all_results_MONO_exp1
stats_metrics_mono_exp1 = compute_position_statistical_metrics(all_results_MONO_exp1)

# ... analogously for other result sets
# stats_metrics_multi_exp1 = compute_position_statistical_metrics(all_results_MULTI_exp1)
# stats_metrics_monochunk_exp2 = compute_position_statistical_metrics(all_results_MONO_exp2)
# stats_metrics_multichunk_exp2 = compute_position_statistical_metrics(all_results_MULTI_exp2)


In [6]:
# Keys correspond to tuples of ((number_of_segments, language_configuration), model_name)
for key in stats_metrics_mono_exp1:
    print(key)

((3, 'en'), 'jinaai/jina-embeddings-v3')
((3, 'en'), 'Alibaba-NLP/gte-multilingual-base')
((4, 'en'), 'jinaai/jina-embeddings-v3')
((4, 'en'), 'Alibaba-NLP/gte-multilingual-base')
((5, 'en'), 'jinaai/jina-embeddings-v3')
((5, 'en'), 'Alibaba-NLP/gte-multilingual-base')
((6, 'en'), 'jinaai/jina-embeddings-v3')
((6, 'en'), 'Alibaba-NLP/gte-multilingual-base')
((3, 'zh'), 'jinaai/jina-embeddings-v3')
((3, 'zh'), 'Alibaba-NLP/gte-multilingual-base')
((4, 'zh'), 'jinaai/jina-embeddings-v3')
((4, 'zh'), 'Alibaba-NLP/gte-multilingual-base')
((5, 'zh'), 'jinaai/jina-embeddings-v3')
((5, 'zh'), 'Alibaba-NLP/gte-multilingual-base')
((6, 'zh'), 'jinaai/jina-embeddings-v3')
((6, 'zh'), 'Alibaba-NLP/gte-multilingual-base')
((3, 'de'), 'jinaai/jina-embeddings-v3')
((3, 'de'), 'Alibaba-NLP/gte-multilingual-base')
((4, 'de'), 'jinaai/jina-embeddings-v3')
((4, 'de'), 'Alibaba-NLP/gte-multilingual-base')
((5, 'de'), 'jinaai/jina-embeddings-v3')
((5, 'de'), 'Alibaba-NLP/gte-multilingual-base')
((6, 'de')

In [None]:
# Access quantitative metrics for a specific experiment instance and model (e.g., 3 segments, German, Alibaba-NLP/gte-multilingual-base)
stats_metrics_mono_exp1[((3, 'de'), 'Alibaba-NLP/gte-multilingual-base')]

{'anova_p_value': 1.3776337592180372e-222,
 'anova_eta_squared_generalized': 0.9399443137391916,
 'ols_betas_adjusted': {0: 0.9379488094124242,
  1: 0.431631975307193,
  2: 0.43015848186440264},
 'ols_p_values_adjusted': {0: 0.0, 1: 0.0, 2: 0.0},
 'ols_betas_raw': {'Intercept': 0.9379488094124242,
  'C(position_cat)[T.1]': -0.5063168341052312,
  'C(position_cat)[T.2]': -0.5077903275480216},
 'ols_p_values_raw': {'Intercept': 0.0,
  'C(position_cat)[T.1]': 0.0,
  'C(position_cat)[T.2]': 0.0},
 'ols_summary': {'Intercept': {'beta': 0.9379488094124242,
   'se': 0.0035906601617199173,
   'ci_low': 0.9309112448147304,
   'ci_high': 0.944986374010118,
   'pval': 0.0},
  'C(position_cat)[T.1]': {'beta': -0.5063168341052312,
   'se': 0.006598417538057491,
   'ci_low': -0.5192494948347813,
   'ci_high': -0.4933841733756811,
   'pval': 0.0},
  'C(position_cat)[T.2]': {'beta': -0.5077903275480216,
   'se': 0.008993084419490803,
   'ci_low': -0.5254164491201518,
   'ci_high': -0.4901642059758913,
