In [1]:
import sys
import os
import random
import gc
import time
import torch
import numpy as np
import pandas as pd
import ast
from tqdm import tqdm
from scipy.stats import spearmanr, pearsonr, kendalltau, rankdata
from sklearn.metrics import ndcg_score
from transformers import AutoModelForCausalLM, AutoTokenizer
from accelerate import Accelerator

current_dir = os.getcwd()
parent_dir = os.path.abspath(os.path.join(current_dir, '..'))
sys.path.append(parent_dir)
from SHapRAG import *

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df= pd.read_csv("../data/synthetic_data/20_synergy_hard_negatives.csv",index_col=False, sep=";")
# df= pd.read_csv("../data/complementary.csv")

In [None]:
df.context[1]

In [None]:
docs = [
# "The weather in Chorvoq is stormy today.",
"Chorvoq is the capital of Narniya.",
# "The sun is shining in Galaba today",
"Nurik is the capital of Suvsambil.",
"Narniya borders several countries including Suvsambil.",
"The currency used in Narniya is the Euro.",
"Narniya is the biggest country in the Olam",
# "Chorvoq hosted the Summer Olympics in 1900 and 1924.",
"The capital of Narniya is Chorvoq.",
# "Suvsambil uses the Euro as well.",
"It is cloudy in Nurik today."
]
query = "What is the capital of the biggest country in the Olam?"
# Parameters
NUM_RETRIEVED_DOCS = len(docs)


In [3]:
SEED = 42
# Initialize Accelerator
accelerator_main = Accelerator(mixed_precision="fp16")

# Load Model
if accelerator_main.is_main_process:
    print("Main Script: Loading model...")
# model_path = "mistralai/Mistral-7B-Instruct-v0.3"
model_path = "meta-llama/Llama-3.1-8B-Instruct"
# model_path = "Qwen/Qwen2.5-3B-Instruct"

model_cpu = AutoModelForCausalLM.from_pretrained(
    model_path,
    torch_dtype=torch.float16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    model_cpu.config.pad_token_id = tokenizer.pad_token_id
    if hasattr(model_cpu, 'generation_config') and model_cpu.generation_config is not None:
        model_cpu.generation_config.pad_token_id = tokenizer.pad_token_id

if accelerator_main.is_main_process:
    print("Main Script: Preparing model with Accelerator...")
prepared_model = accelerator_main.prepare(model_cpu)
unwrapped_prepared_model = accelerator_main.unwrap_model(prepared_model)
unwrapped_prepared_model.eval()
if accelerator_main.is_main_process:
    print("Main Script: Model prepared and set to eval.")

# Define utility cache

accelerator_main.wait_for_everyone()

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Main Script: Loading model...


Loading checkpoint shards: 100%|██████████| 4/4 [00:01<00:00,  2.51it/s]


Main Script: Preparing model with Accelerator...
Main Script: Model prepared and set to eval.


In [8]:
num_questions_to_run=len(df.question)
# num_questions_to_run=1
all_metrics_data = []
all_results=[]
M=[]
Fs=[]
pairs=[]
mse_inters=[]
mse_lins=[]
mse_fms=[]
for i in tqdm(range(num_questions_to_run), desc="Processing Questions", disable=not accelerator_main.is_main_process):
    query = df.question[i]
    if accelerator_main.is_main_process:
        print(f"\n--- Question {i+1}/{num_questions_to_run}: {query[:60]}... ---")

    docs=ast.literal_eval(df.context[i])

    utility_cache_base_dir = "../Experiment_data/synergy"
    utility_cache_filename = f"utilities_q_idx{i}_n{len(docs)}.pkl" # More robust naming
    current_utility_path = os.path.join(utility_cache_base_dir, utility_cache_filename)
    
    if accelerator_main.is_main_process: # Only main process creates directories
        os.makedirs(os.path.dirname(current_utility_path), exist_ok=True)
        print(f"  Instantiating ShapleyExperimentHarness for Q{i} (n={len(docs)} docs)...")
    
    # Initialize Harness
    harness = ShapleyExperimentHarness(
        items=docs,
        query=query,
        prepared_model_for_harness=prepared_model,
        tokenizer_for_harness=tokenizer,
        accelerator_for_harness=accelerator_main,
        verbose=True,
        utility_path=current_utility_path
    )
    # Compute metrics
    results_for_query = {}
    M.append(harness.compute_shapley_interaction_index_pairs_matrix())
    if accelerator_main.is_main_process:

        results_for_query["ExactLinear"], mse_lin = harness.compute_exact_linear_shap()
        results_for_query["ExactInter"], pair, mse_inter = harness.compute_exact_inter_shap()
        pairs.append(pair)
        mse_lins.append(mse_lin)
        mse_inters.append(mse_inter)
        m_samples_map = {"L": 100} 
        T_iterations_map = { "L":20} 

        for size_key, num_s in m_samples_map.items():
            if 2**len(docs) < num_s and size_key != "L":
                actual_samples = max(1, 2**len(docs)-1 if 2**len(docs)>0 else 1)
            else:
                actual_samples = num_s

            if actual_samples > 0: 
                results_for_query[f"ContextCite{actual_samples}"] = harness.compute_contextcite_weights(num_samples=actual_samples, seed=SEED)
                results_for_query[f"WSS_FM{actual_samples}"], F, mse_fm = harness.compute_wss(num_samples=actual_samples, seed=SEED, distil=None, sampling="kernelshap",sur_type="fm", util='pure-surrogate', pairchecking=False)
                Fs.append(F)
                mse_fms.append(mse_fm)
                results_for_query[f"BetaShap{actual_samples}"] = harness.compute_beta_shap(num_iterations_max=T_iterations_map[size_key], beta_a=4, beta_b=4, max_unique_lookups=actual_samples, seed=SEED)
                results_for_query[f"TMC{actual_samples}"] = harness.compute_tmc_shap(num_iterations_max=T_iterations_map[size_key], performance_tolerance=0.001, max_unique_lookups=actual_samples, seed=SEED)

        results_for_query["LOO"] = harness.compute_loo()

        exact_scores = results_for_query.get("ExactInter")
        all_results.append(results_for_query)
        if exact_scores is not None:
            positive_exact_score = np.clip(exact_scores, a_min=0.0, a_max=None) # FOR NDGC SCORE COMPUTATION
            for method, approx_scores in results_for_query.items():
                if method != "Exact" and approx_scores is not None:
                    if len(approx_scores) == len(exact_scores):
                        if np.all(exact_scores == exact_scores[0]) or np.all(approx_scores == approx_scores[0]):
                            pearson_c = 1.0 if np.allclose(exact_scores, approx_scores) else 0.0
                            spearman_c = 1.0 if np.allclose(exact_scores, approx_scores) else 0.0
                        else:
                            pearson_c, _ = pearsonr(exact_scores, approx_scores)
                            exact_ranks = rankdata(-np.array(exact_scores), method="average") # rank scores with the smallest =1 and when there is a tie assign the average rank
                            approx_ranks = rankdata(-np.array(approx_scores), method = "average")
                            kendall_c, _ = kendalltau(exact_ranks, approx_ranks) # return tau and pval (if pval is < 0.005 we can say that correlation is statistically significant) 
                        
                        all_metrics_data.append({
                            "Question_Index": i, "Query": query, "Method": method,
                            "Pearson": pearson_c, "KendallTau" : kendall_c,
                        })
                    else:
                        print(f"    Score length mismatch for method {method} (Exact: {len(exact_scores)}, Approx: {len(approx_scores)}). Skipping metrics.")
        else:
            print(f"    Skipping metric calculation for Q{i} as Exact Shapley was not computed or failed.")
    
    accelerator_main.wait_for_everyone() 
   
    if torch.cuda.is_available():
        if accelerator_main.is_main_process: # Print from one process
            print(f"Attempting to empty CUDA cache on rank {accelerator_main.process_index} after Q{i}")
        torch.cuda.empty_cache()
        gc.collect()
        if accelerator_main.is_main_process:
            print(f"CUDA cache empty attempt complete on rank {accelerator_main.process_index}.")
    accelerator_main.wait_for_everyone()


if accelerator_main.is_main_process:
    if all_metrics_data:
        metrics_df_all_questions = pd.DataFrame(all_metrics_data)
        print("\n\n--- Average Correlation Metrics Across All Questions ---")
        average_metrics = metrics_df_all_questions.groupby("Method").agg(
            Avg_Pearson=("Pearson", "mean"),
            Avg_Kendall =("KendallTau", "mean"),
            Num_Valid_Queries=("Query", "nunique")
        ).sort_values(by="Avg_Pearson", ascending=False)
        
        print(average_metrics.round(4))
    else:
        print("\nNo metrics were collected. This might be due to all calculations failing or only non-main processes running sections.")

# Final synchronization before script ends
accelerator_main.wait_for_everyone()
if accelerator_main.is_main_process:
    print("Script finished.")

if torch.distributed.is_available() and torch.distributed.is_initialized():
    if accelerator_main.is_local_main_process:
        print(f"Rank {accelerator_main.process_index} (Local Main): Manually destroying process group...")
    torch.distributed.destroy_process_group()
    if accelerator_main.is_local_main_process:
        print(f"Rank {accelerator_main.process_index} (Local Main): Process group destroyed.")
else:
    if accelerator_main.is_local_main_process:
        print(f"Rank {accelerator_main.process_index} (Local Main): Distributed environment not initialized or not available, skipping destroy_process_group.")

if accelerator_main.is_main_process:
    print("Script fully exited.")




--- Question 1/20: What kind of energy does the facility hovering the planet Xy... ---
  Instantiating ShapleyExperimentHarness for Q0 (n=10 docs)...
Generating target response based on full context...
Target response: 'Electrical power.'
Successfully loaded utilities from ../Experiment_data/synergy/utilities_q_idx0_n10.pkl. Found 1024 entries.
Broadcasted loaded utilities to all processes.


Pairwise Interactions (Matrix): 100%|██████████| 45/45 [00:00<00:00, 241.66it/s]
  return fit_method(estimator, *args, **kwargs)
  model = cd_fast.enet_coordinate_descent(


Attempting to empty CUDA cache on rank 0 after Q0
CUDA cache empty attempt complete on rank 0.

--- Question 2/20: What is the primary defense of the creatures populating the ... ---
  Instantiating ShapleyExperimentHarness for Q1 (n=10 docs)...
Generating target response based on full context...
Target response: 'The primary defense of the creatures populating the Subterranean Caves is their unique abilities to protect themselves from predators.'
Successfully loaded utilities from ../Experiment_data/synergy/utilities_q_idx1_n10.pkl. Found 1024 entries.
Broadcasted loaded utilities to all processes.


Pairwise Interactions (Matrix): 100%|██████████| 45/45 [00:00<00:00, 268.90it/s]
  return fit_method(estimator, *args, **kwargs)
  model = cd_fast.enet_coordinate_descent(


Attempting to empty CUDA cache on rank 0 after Q1
CUDA cache empty attempt complete on rank 0.

--- Question 3/20: What is the main function of the device recently discovered ... ---
  Instantiating ShapleyExperimentHarness for Q2 (n=10 docs)...
Generating target response based on full context...
Target response: 'Stabilize localized temporal distortions.'
Successfully loaded utilities from ../Experiment_data/synergy/utilities_q_idx2_n10.pkl. Found 1024 entries.
Broadcasted loaded utilities to all processes.


Pairwise Interactions (Matrix): 100%|██████████| 45/45 [00:00<00:00, 309.74it/s]
  return fit_method(estimator, *args, **kwargs)
  model = cd_fast.enet_coordinate_descent(


Attempting to empty CUDA cache on rank 0 after Q2
CUDA cache empty attempt complete on rank 0.

--- Question 4/20: What is the composition of the natural wonder of planet Ntun... ---
  Instantiating ShapleyExperimentHarness for Q3 (n=10 docs)...
Generating target response based on full context...
Target response: 'The 'Floating Islands of Aeridia' are primarily composed of Aeridium Ore.'
Successfully loaded utilities from ../Experiment_data/synergy/utilities_q_idx3_n10.pkl. Found 1024 entries.
Broadcasted loaded utilities to all processes.


Pairwise Interactions (Matrix): 100%|██████████| 45/45 [00:00<00:00, 297.50it/s]
  return fit_method(estimator, *args, **kwargs)
  model = cd_fast.enet_coordinate_descent(


Attempting to empty CUDA cache on rank 0 after Q3
CUDA cache empty attempt complete on rank 0.

--- Question 5/20: What is the traditional weapon of the defenders of the Citad... ---
  Instantiating ShapleyExperimentHarness for Q4 (n=10 docs)...
Generating target response based on full context...
Target response: 'The traditional weapon of the Sky-Guardians.'
Successfully loaded utilities from ../Experiment_data/synergy/utilities_q_idx4_n10.pkl. Found 1024 entries.
Broadcasted loaded utilities to all processes.


Pairwise Interactions (Matrix): 100%|██████████| 45/45 [00:00<00:00, 352.55it/s]
  return fit_method(estimator, *args, **kwargs)
  model = cd_fast.enet_coordinate_descent(


Attempting to empty CUDA cache on rank 0 after Q4
CUDA cache empty attempt complete on rank 0.

--- Question 6/20: What kind of power source is used by the ancient constructs ... ---
  Instantiating ShapleyExperimentHarness for Q5 (n=10 docs)...
Generating target response based on full context...
Target response: 'Animus Cores, Geothermal Converters, and Starlight Capacitors.'
Successfully loaded utilities from ../Experiment_data/synergy/utilities_q_idx5_n10.pkl. Found 1024 entries.
Broadcasted loaded utilities to all processes.


Pairwise Interactions (Matrix): 100%|██████████| 45/45 [00:00<00:00, 268.29it/s]
  return fit_method(estimator, *args, **kwargs)
  model = cd_fast.enet_coordinate_descent(


Attempting to empty CUDA cache on rank 0 after Q5
CUDA cache empty attempt complete on rank 0.

--- Question 7/20: What is the primary ingredient of the most famous healing po... ---
  Instantiating ShapleyExperimentHarness for Q6 (n=10 docs)...
Generating target response based on full context...
Target response: 'Finely ground powder from Glow-Moss.'
Successfully loaded utilities from ../Experiment_data/synergy/utilities_q_idx6_n10.pkl. Found 1024 entries.
Broadcasted loaded utilities to all processes.


Pairwise Interactions (Matrix): 100%|██████████| 45/45 [00:00<00:00, 328.42it/s]
  return fit_method(estimator, *args, **kwargs)
  model = cd_fast.enet_coordinate_descent(


Attempting to empty CUDA cache on rank 0 after Q6
CUDA cache empty attempt complete on rank 0.

--- Question 8/20: What defensive ability is possessed by the strongest creatur... ---
  Instantiating ShapleyExperimentHarness for Q7 (n=10 docs)...
Generating target response based on full context...
Target response: 'Instantaneous growth of razor-sharp crystalline thorns.'
Successfully loaded utilities from ../Experiment_data/synergy/utilities_q_idx7_n10.pkl. Found 1024 entries.
Broadcasted loaded utilities to all processes.


Pairwise Interactions (Matrix): 100%|██████████| 45/45 [00:00<00:00, 305.32it/s]
  return fit_method(estimator, *args, **kwargs)
  model = cd_fast.enet_coordinate_descent(


Attempting to empty CUDA cache on rank 0 after Q7
CUDA cache empty attempt complete on rank 0.

--- Question 9/20: What is the energy source of the massive orbital platform or... ---
  Instantiating ShapleyExperimentHarness for Q8 (n=10 docs)...
Generating target response based on full context...
Target response: 'A miniature black hole.'
Successfully loaded utilities from ../Experiment_data/synergy/utilities_q_idx8_n10.pkl. Found 1024 entries.
Broadcasted loaded utilities to all processes.


Pairwise Interactions (Matrix): 100%|██████████| 45/45 [00:00<00:00, 320.76it/s]
  return fit_method(estimator, *args, **kwargs)
  model = cd_fast.enet_coordinate_descent(


Attempting to empty CUDA cache on rank 0 after Q8
CUDA cache empty attempt complete on rank 0.

--- Question 10/20: What is the population of the capital of the biggest country... ---
  Instantiating ShapleyExperimentHarness for Q9 (n=10 docs)...
Generating target response based on full context...
Target response: 'Over 300 million people.'
Successfully loaded utilities from ../Experiment_data/synergy/utilities_q_idx9_n10.pkl. Found 1024 entries.
Broadcasted loaded utilities to all processes.


Pairwise Interactions (Matrix): 100%|██████████| 45/45 [00:00<00:00, 316.43it/s]
  return fit_method(estimator, *args, **kwargs)
  model = cd_fast.enet_coordinate_descent(


Attempting to empty CUDA cache on rank 0 after Q9
CUDA cache empty attempt complete on rank 0.

--- Question 11/20: What's the weather like in the capital of Zhykara?... ---
  Instantiating ShapleyExperimentHarness for Q10 (n=10 docs)...
Generating target response based on full context...
Target response: 'Ktharr City is experiencing a snow storm.'
Successfully loaded utilities from ../Experiment_data/synergy/utilities_q_idx10_n10.pkl. Found 1024 entries.
Broadcasted loaded utilities to all processes.


Pairwise Interactions (Matrix): 100%|██████████| 45/45 [00:00<00:00, 311.90it/s]
  return fit_method(estimator, *args, **kwargs)
  model = cd_fast.enet_coordinate_descent(


Attempting to empty CUDA cache on rank 0 after Q10
CUDA cache empty attempt complete on rank 0.

--- Question 12/20: What's the value of the element obtained by mixing Aethelite... ---
  Instantiating ShapleyExperimentHarness for Q11 (n=10 docs)...
Generating target response based on full context...
Target response: 'Nitros.'
Successfully loaded utilities from ../Experiment_data/synergy/utilities_q_idx11_n10.pkl. Found 1024 entries.
Broadcasted loaded utilities to all processes.


Pairwise Interactions (Matrix): 100%|██████████| 45/45 [00:00<00:00, 299.07it/s]
  return fit_method(estimator, *args, **kwargs)
  model = cd_fast.enet_coordinate_descent(


Attempting to empty CUDA cache on rank 0 after Q11
CUDA cache empty attempt complete on rank 0.

--- Question 13/20: What's the side effect of mixing Gravitium and Solara?... ---
  Instantiating ShapleyExperimentHarness for Q12 (n=10 docs)...
Generating target response based on full context...
Target response: 'Creating Umbradium.'
Successfully loaded utilities from ../Experiment_data/synergy/utilities_q_idx12_n10.pkl. Found 1024 entries.
Broadcasted loaded utilities to all processes.


Pairwise Interactions (Matrix): 100%|██████████| 45/45 [00:00<00:00, 317.05it/s]
  return fit_method(estimator, *args, **kwargs)
  model = cd_fast.enet_coordinate_descent(


Attempting to empty CUDA cache on rank 0 after Q12
CUDA cache empty attempt complete on rank 0.

--- Question 14/20: What is the radius of the largest planet in the K'tharr Syst... ---
  Instantiating ShapleyExperimentHarness for Q13 (n=10 docs)...
Generating target response based on full context...
Target response: '120,000 kilometers.'
Successfully loaded utilities from ../Experiment_data/synergy/utilities_q_idx13_n10.pkl. Found 1024 entries.
Broadcasted loaded utilities to all processes.


Pairwise Interactions (Matrix): 100%|██████████| 45/45 [00:00<00:00, 305.37it/s]
  return fit_method(estimator, *args, **kwargs)
  model = cd_fast.enet_coordinate_descent(


Attempting to empty CUDA cache on rank 0 after Q13
CUDA cache empty attempt complete on rank 0.

--- Question 15/20: What is the population of the smallest planet in the Lumina ... ---
  Instantiating ShapleyExperimentHarness for Q14 (n=10 docs)...
Generating target response based on full context...
Target response: '1.5 million.'
Successfully loaded utilities from ../Experiment_data/synergy/utilities_q_idx14_n10.pkl. Found 1024 entries.
Broadcasted loaded utilities to all processes.


Pairwise Interactions (Matrix): 100%|██████████| 45/45 [00:00<00:00, 304.89it/s]
  return fit_method(estimator, *args, **kwargs)
  model = cd_fast.enet_coordinate_descent(


Attempting to empty CUDA cache on rank 0 after Q14
CUDA cache empty attempt complete on rank 0.

--- Question 16/20: What is the title of the latest film starring the highest-pa... ---
  Instantiating ShapleyExperimentHarness for Q15 (n=10 docs)...
Generating target response based on full context...
Target response: ''Echoes of the Nebulae'.'
Successfully loaded utilities from ../Experiment_data/synergy/utilities_q_idx15_n10.pkl. Found 1024 entries.
Broadcasted loaded utilities to all processes.


Pairwise Interactions (Matrix): 100%|██████████| 45/45 [00:00<00:00, 305.63it/s]
  return fit_method(estimator, *args, **kwargs)
  model = cd_fast.enet_coordinate_descent(


Attempting to empty CUDA cache on rank 0 after Q15
CUDA cache empty attempt complete on rank 0.

--- Question 17/20: What is the salary of the most popular actor on the planet A... ---
  Instantiating ShapleyExperimentHarness for Q16 (n=10 docs)...
Generating target response based on full context...
Target response: '50 million Credits per major project.'
Successfully loaded utilities from ../Experiment_data/synergy/utilities_q_idx16_n10.pkl. Found 1024 entries.
Broadcasted loaded utilities to all processes.


Pairwise Interactions (Matrix): 100%|██████████| 45/45 [00:00<00:00, 285.14it/s]
  return fit_method(estimator, *args, **kwargs)
  model = cd_fast.enet_coordinate_descent(


Attempting to empty CUDA cache on rank 0 after Q16
CUDA cache empty attempt complete on rank 0.

--- Question 18/20: What is the title of the first album by the most popular Chr... ---
  Instantiating ShapleyExperimentHarness for Q17 (n=10 docs)...
Generating target response based on full context...
Target response: 'Temporal Drift.'
Successfully loaded utilities from ../Experiment_data/synergy/utilities_q_idx17_n10.pkl. Found 1024 entries.
Broadcasted loaded utilities to all processes.


Pairwise Interactions (Matrix): 100%|██████████| 45/45 [00:00<00:00, 272.48it/s]
  return fit_method(estimator, *args, **kwargs)
  model = cd_fast.enet_coordinate_descent(


Attempting to empty CUDA cache on rank 0 after Q17
CUDA cache empty attempt complete on rank 0.

--- Question 19/20: What is the total number of plays for the most critically ap... ---
  Instantiating ShapleyExperimentHarness for Q18 (n=10 docs)...
Generating target response based on full context...
Target response: '450 million.'
Successfully loaded utilities from ../Experiment_data/synergy/utilities_q_idx18_n10.pkl. Found 1024 entries.
Broadcasted loaded utilities to all processes.


Pairwise Interactions (Matrix): 100%|██████████| 45/45 [00:00<00:00, 286.91it/s]
  return fit_method(estimator, *args, **kwargs)
  model = cd_fast.enet_coordinate_descent(


Attempting to empty CUDA cache on rank 0 after Q18
CUDA cache empty attempt complete on rank 0.

--- Question 20/20: What is the effect of drinking water from the sacred spring ... ---
  Instantiating ShapleyExperimentHarness for Q19 (n=10 docs)...
Generating target response based on full context...
Target response: 'The water from the Font of Resonance amplifies the mental abilities of those who drink it.'
Successfully loaded utilities from ../Experiment_data/synergy/utilities_q_idx19_n10.pkl. Found 1024 entries.
Broadcasted loaded utilities to all processes.


Pairwise Interactions (Matrix): 100%|██████████| 45/45 [00:00<00:00, 267.17it/s]
  return fit_method(estimator, *args, **kwargs)
  model = cd_fast.enet_coordinate_descent(
Processing Questions: 100%|██████████| 20/20 [00:18<00:00,  1.09it/s]

Attempting to empty CUDA cache on rank 0 after Q19
CUDA cache empty attempt complete on rank 0.


--- Average Correlation Metrics Across All Questions ---
                Avg_Pearson  Avg_Kendall  Num_Valid_Queries
Method                                                     
ExactInter           1.0000       1.0000                 20
ExactLinear          1.0000       1.0000                 20
WSS_FM100            0.9829       0.7280                 20
ContextCite100       0.9743       0.6714                 20
TMC100               0.9457       0.6372                 20
BetaShap100          0.9272       0.6480                 20
LOO                  0.8690       0.5029                 20
Script finished.
Rank 0 (Local Main): Distributed environment not initialized or not available, skipping destroy_process_group.
Script fully exited.





In [5]:
print(f"Interaction: {mse_inters},\n Linear: {mse_lins},\n FM: {mse_fms}")

Interaction: [0.09774896487687629, 0.08501734797637668, 0.019761560652927196, 0.0013347903692996028, 0.06594404028146503, 0.011158172847565583, 0.4172564780502045, 0.017130628046105052, 0.0096394961241482, 0.16791229510289846, 0.25242411255181035, 0.003831703077902289, 0.013714988396998282, 0.1776722058603319, 0.36979628095176587, 0.05542755963874956, 0.2263621876018738, 0.6102293614713568, 0.3508369116190768, 0.057492438415899946],
 Linear: [0.18623424128922017, 0.256493055526992, 0.03287140214429542, 0.003055569215165882, 0.12238553221081654, 0.023090882675048083, 2.1687463838740237, 0.06172822208809088, 0.06581568918490696, 1.15222384080821, 1.4507206752577928, 0.009518625205963398, 0.0575383762267966, 0.763112184921872, 1.606143661656533, 0.5935353140628266, 1.3195736251198666, 2.837954017985678, 1.6520974264059336, 0.2096100033402582],
 FM: [0.07447952886097514, 0.07027152408649431, 0.011676865132733605, 0.0011816209018718307, 0.04522814841564803, 0.007110059422978513, 0.279271417

In [6]:
print(f"Interaction: {sum(mse_inters)},\n Linear: {sum(mse_lins)},\n FM: {sum(mse_fms)}")


Interaction: 3.0106915239136316,
 Linear: 14.57244872920029,
 FM: 1.9842557840014978


In [None]:
harness.compute_exhaustive_top_k(2)

In [None]:
M[8]

In [None]:
# Evaluate metrics
all_metrics_data = []
exact_scores = results_for_query.get("Exact")
if exact_scores is not None:
    positive_exact_score = np.clip(exact_scores, a_min=0.0, a_max=None)
    for method, approx_scores in results_for_query.items():
        if method != "Exact" and approx_scores is not None and len(approx_scores) == len(exact_scores):
            if np.all(exact_scores == exact_scores[0]) or np.all(approx_scores == approx_scores[0]):
                pearson_c = spearman_c = 1.0 if np.allclose(exact_scores, approx_scores) else 0.0
            else:
                pearson_c, _ = pearsonr(exact_scores, approx_scores)
                spearman_c, _ = spearmanr(exact_scores, approx_scores)
                exact_ranks = rankdata(-np.array(exact_scores), method="average")
                approx_ranks = rankdata(-np.array(approx_scores), method="average")
                kendall_c, _ = kendalltau(exact_ranks, approx_ranks)
            ndgc_scoring = ndcg_score([positive_exact_score], [approx_scores], k=3)

            all_metrics_data.append({
                    "Method": method,
                "Pearson": pearson_c, "Spearman": spearman_c, "NDCG": ndgc_scoring, "KendallTau": kendall_c
            })
            all_metrics_data.sort(key=lambda x: x["Pearson"], reverse=True)


In [None]:
import matplotlib.pyplot as plt

method_scores = {}

for result in all_results:
    for method, scores in result.items():
        if scores is not None:
            method_scores[method] = np.round(scores, 4)

for method, scores in method_scores.items():
    plt.figure(figsize=(10, 4))
    plt.bar(range(len(scores)), scores, color='skyblue')
    plt.title(f"Approximate Scores: {method}")
    plt.xlabel("Index")
    plt.ylabel("Score")
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.xticks(range(len(scores)))
    plt.tight_layout()
    plt.show()


In [None]:
F[1]

In [None]:
df.context[19]