In [1]:
import pandas as pd
import sys
import pickle
import importlib
import os
import numpy as np
import scrapbook as sb
from sentence_transformers import SentenceTransformer

# Ensure project root is on sys.path (required for papermill fresh kernels)
current_dir = os.getcwd()
if current_dir not in sys.path:
    sys.path.insert(0, current_dir)

import free_entailments_algorithm_utils as fea

In [2]:
iteration_number = 1
input_csv_path = "labeled_pairs/Results_DS_BtoS_iteration_0.csv"
df_clause_path = None
embedding_cache_path = None
test = True
remaining_llm_calls_path = None
unlabeled_pairs_path = None
sent_frac = 0.5
budget = 0.00

In [3]:
# Parameters
iteration_number = 0
input_csv_path = "labeled_pairs/Results_DS_BtoS_iteration_0.csv"
df_clause_path = "fea_iterations\\loop_data/df_clause.pkl"
embedding_cache_path = "fea_iterations\\loop_data/embedding_cache.pkl"
test = False
remaining_llm_calls_path = None
unlabeled_pairs_path = "fea_iterations\\loop_data/unlabeled_pairs.pkl"
sent_frac = 0.5
budget = 0.0


In [4]:
pipeline_data = fea.load_pipeline_data(
    df_clause_path=df_clause_path,
    embedding_cache_path=embedding_cache_path,
    test=test,
    remaining_llm_calls_path=remaining_llm_calls_path,
    unlabeled_pairs_path=unlabeled_pairs_path,
    iteration_number=iteration_number,
)

df_clause = pipeline_data['df_clause']
embedding_cache_finetuned = pipeline_data['embedding_cache']
remaining_llm_calls = pipeline_data['remaining_llm_calls']
unlabeled_pairs = pipeline_data['unlabeled_pairs']


PARAMETER VALUES AFTER PAPERMILL INJECTION:
iteration_number = 0
test = False
remaining_llm_calls_path = None
df_clause_path = fea_iterations\loop_data/df_clause.pkl

✓ Loaded df_clause: 63909 rows


✓ Loaded embedding cache: 63909 embeddings
✓ Loaded unlabeled_pairs: 1000 rows
✓ All data loaded from pickle files


# Task 1: Seting up dataframes and Running FEA

In [5]:
df_llm_original = pd.read_csv(input_csv_path)

# If 'verdict' column already exists (e.g. from process_llm_results_bidirectional),
# use it directly instead of recomputing via add_verdict (which only sees one-way
# conclusions and would overwrite correct bidirectional verdicts).
if 'verdict' in df_llm_original.columns and df_llm_original['verdict'].notna().any():
    df_llm = df_llm_original
    print(f"Using existing 'verdict' column ({(df_llm['verdict']=='YES').sum()} YES, {(df_llm['verdict']=='NO').sum()} NO)")
else:
    df_llm = fea.add_verdict(
        df_llm_original,
        id1_col='sentence_id_1',
        id2_col='sentence_id_2',
        conclusion_col='llm_conclusion_12',
        positive_label='YES'
    )

if test:
    df_llm_remaining = fea.add_verdict(
        remaining_llm_calls,
        id1_col='sentence_id_1',
        id2_col='sentence_id_2',
        conclusion_col='llm_conclusion_12',
        positive_label='YES'
    )

Using existing 'verdict' column (1 YES, 5 NO)


In [6]:
df_labeled = fea.merge_pairwise_texts(
    df1 = df_clause,
    df2 = df_llm,
    df1_cols = ['sentence_id', 'sentence'],
    df2_cols = ['sentence_id_1', 'sentence_id_2', 'verdict']
)
df_labeled.head()

Unnamed: 0,id1,id2,text1,text2,verdict
0,B0674004p,S0051696006p,Maintaining order and justice is essential for...,Maintaining democratic oversight is crucial to...,NO
1,B1135002sc,S0018445003sc,Effective monarchy governance requires laws to...,The King must ensure governance,NO
2,B0448006p,B1089003p,The emphasis on a singular sovereign power in ...,The accountability of the sovereign to God emp...,NO
3,B0427001sc,B0596001sc,Popular Estates are essential for a just monar...,Lawful political authority is essential for so...,NO
4,B0083004p,B0132002p,The legitimacy of governance is rooted in the ...,The essence of legitimate governance lies in t...,YES


In [7]:
if test:
    df_predict = fea.merge_pairwise_texts(
        df1 = df_clause,
        df2 = df_llm_remaining,
        df1_cols = ['sentence_id', 'sentence'],
        df2_cols = ['sentence_id_1', 'sentence_id_2']
    )
else:
    df_predict = fea.setminus(
        df_big= unlabeled_pairs,
        df_small= df_labeled,
        id_cols = ['id1', 'id2']
    )

    df_predict = fea.merge_pairwise_texts(
        df1 = df_clause,
        df2 = df_predict,
        df1_cols = ['sentence_id', 'sentence'],
        df2_cols = ['id1', 'id2']
    )

df_predict.head()


Set difference: 1,000 - 6 = 1,000 rows


Unnamed: 0,id1,id2,text1,text2,verdict
0,B0628003p,B1143008p,The treasonous plot specifically involved an a...,This structure enhances the likelihood of wise...,
1,B0423006p,B0530003p,Magistrates are bound to obey the sovereign's ...,Usurpation is defined as a domestic conquest w...,
2,B0934002p,B1065002p,"Historically, the governance of the Kingdom wa...",Civil law is defined as the rules that the Com...,
3,B0716001p,B1125006p,The promise of obedience and compliance is ess...,"True peace is a harmonious society, not merely...",
4,B0560009p,B1204002p,The papal authority challenged the sanctity of...,The position of rulers is described as a deleg...,


## Embedding All Sentences

In [8]:
## Patches an error later on with kwargs
import transformers.utils.hub
import transformers.tokenization_utils_base

def _safe_list_templates(*args, **kwargs):
    return []

transformers.utils.hub.list_repo_templates = _safe_list_templates
print(" - Patched transformers.utils.hub")

# The library had already imported the broken function here, so we must update it.
transformers.tokenization_utils_base.list_repo_templates = _safe_list_templates
print(" - Patched transformers.tokenization_utils_base")

print("\nSUCCESS: The 404 error is now blocked.")

 - Patched transformers.utils.hub
 - Patched transformers.tokenization_utils_base

SUCCESS: The 404 error is now blocked.


## Test and Validation Subsamples

In [9]:
# Keep only entailed pairs from sent
df_obs_ent = df_labeled.loc[df_labeled['verdict'] == 'YES']
df_obs_ent.head()

Unnamed: 0,id1,id2,text1,text2,verdict
4,B0083004p,B0132002p,The legitimacy of governance is rooted in the ...,The essence of legitimate governance lies in t...,YES


In [10]:
df_candidates = fea.add_equivalents_from_pairs(
    df3=df_obs_ent,
    df4=df_predict,
    df3_cols=["id1", "id2"],
    df4_cols=["id1", "id2"],
    new_cols=("equivalents1", "equivalents2"),
    include_self=False,
)

df_candidates = fea.add_alpha_weight_column(
    df = df_candidates,
    list_col1 = 'equivalents1',
    list_col2 = 'equivalents2',
    new_col = "alpha"
)

In [11]:
df_labeled = fea.add_equivalents_from_pairs(
    df3=df_obs_ent,
    df4=df_labeled,
    df3_cols=["id1", "id2"],
    df4_cols=["id1", "id2"],
    new_cols=("equivalents1", "equivalents2"),
    include_self=False,  # keep the ID itself in the list
)

df_labeled = fea.add_alpha_weight_column(
    df = df_labeled,
    list_col1 = 'equivalents1',
    list_col2 = 'equivalents2',
    new_col = "alpha"
)

## Equivalence Classes

In [12]:
# Produce set of all pairs of clauses i/j with k in the class of j/i
df_crossed = fea.build_equiv_pair_candidates(
    df = df_candidates,
    id1_col = "id1",
    id2_col = "id2",
    equiv1_col = "equivalents1",
    equiv2_col = "equivalents2",
)

# Retrieve clause sentences
df_crossed = fea.merge_pairwise_texts(
    df1 = df_clause,
    df2 = df_crossed,
    df1_cols = ['sentence_id', 'sentence'],
    df2_cols = ['id1', 'id2']
)

df_crossed.head()

Unnamed: 0,id1,id2,text1,text2,verdict
0,S13558006p,B0132002p,The proposed Bill disregards established legal...,The essence of legitimate governance lies in t...,


In [13]:
df_labeled_crossed = fea.build_equiv_pair_candidates(
    df = df_labeled,
    id1_col = "id1",
    id2_col = "id2",
    equiv1_col = "equivalents1",
    equiv2_col = "equivalents2",
)

# Retrieve clause sentences
df_labeled_crossed = fea.merge_pairwise_texts(
    df1 = df_clause,
    df2 = df_labeled_crossed,
    df1_cols = ['sentence_id', 'sentence'],
    df2_cols = ['id1', 'id2']
)

df_labeled_crossed.head()

Unnamed: 0,id1,id2,text1,text2,verdict
0,B0083004p,B0083004p,The legitimacy of governance is rooted in the ...,The legitimacy of governance is rooted in the ...,
1,B0132002p,B0132002p,The essence of legitimate governance lies in t...,The essence of legitimate governance lies in t...,


## Running FEA

In [14]:
df_final, fig_html = fea.run_fea_papermill(
    iteration_number=iteration_number,
    df_candidates=df_candidates,
    df_crossed=df_crossed,
    df_labeled=df_labeled,
    df_labeled_crossed=df_labeled_crossed,
    df_obs_ent=df_obs_ent,
    df_clause=df_clause,
    embedding_cache=embedding_cache_finetuned,
)

Executing FreeEntailmentAlgorithm.ipynb for iteration 0...


Executing:   0%|          | 0/33 [00:00<?, ?cell/s]

✓ Retrieved outputs:
  - df_final: 1 rows
  - fig_html: HTML plot (13949 chars)


In [15]:
df_final.head()

Unnamed: 0,id1,id2,text1,text2,entailment_probability
855,B0653002sc,S0053530002sc,Consent of the governed is essential for gover...,Parliament must accurately represent the will ...,0.169761


# Task 2: Cleaning LLM Calls

In [16]:
# Cap at 100k pairs max — send ALL pairs above threshold (no random subsampling)
MAX_LLM_PAIRS = 100_000

df_final = df_final.reset_index(drop=True)

if len(df_final) > MAX_LLM_PAIRS:
    df_to_llm = df_final.sample(n=MAX_LLM_PAIRS, random_state=42)
    print(f"Capped df_to_llm at {MAX_LLM_PAIRS:,} (from {len(df_final):,} above threshold)")
else:
    df_to_llm = df_final.copy()
    print(f"Sending all {len(df_to_llm):,} pairs above threshold to LLM")

Sending all 1 pairs above threshold to LLM


In [17]:
df_to_llm = fea.format_df_to_llm(df_to_llm)
df_to_llm.head()

Unnamed: 0,sentence_id_2,sentence_id_1,sentence_text_2,argument_id_2,sentence_text_1,argument_id_1,score
0,S0053530002sc,B0653002sc,Parliament must accurately represent the will ...,S00535,Consent of the governed is essential for gover...,B0653,0.169761


In [18]:
df_to_llm.shape

(1, 7)

# Next loop:

In [19]:
result = fea.finalize_pipeline_iteration(
    test=test,
    df_to_llm=df_to_llm,
    iteration_number=iteration_number,
    remaining_llm_calls=remaining_llm_calls,
    remaining_llm_calls_path=remaining_llm_calls_path,
    unlabeled_pairs=unlabeled_pairs,
    unlabeled_pairs_path=unlabeled_pairs_path,
)

remaining_llm_calls = result['remaining_llm_calls']
unlabeled_pairs = result['unlabeled_pairs']

✓ Saved 1 pairs to fea_iterations/llm_batch_iter_0.csv for LLM processing
✓ LLM labeled pairs updated: 15 total in labeled_pairs/llm_labeled_pairs.csv


✓ Removed 1 pairs from unlabeled_pairs
✓ Remaining pairs for future iterations: 999
✓ Saved updated unlabeled_pairs to fea_iterations\loop_data/unlabeled_pairs.pkl


✓ Glued df_to_llm to scrapbook for FEA_Loop retrieval

Iteration 0 complete
