# Testing Pipelines

### Initiating observation via Phoenix

In [4]:
# %pip pip install arize-phoenix
# %pip install llama-index-callbacks-arize-phoenix
# observability
import llama_index.core  # type: ignore
import phoenix as px  # type: ignore

px.launch_app()

llama_index.core.set_global_handler("arize_phoenix", endpoint="http://localhost:6006/v1/traces")

🌍 To view the Phoenix app in your browser, visit http://localhost:6006/
📖 For more information on how to use Phoenix, check out https://docs.arize.com/phoenix


In [1]:
example_inputs = [
    "Die Osterweiterung hat die EU-Institutionen nachhaltig geschwächt.",
    #"In den knapp 70 Jahren seit ihrer Gründung hat es in der Bundeswehr immer wieder rechtsextremistische Vorfälle gegeben.",
    #"In der Bundeswehr gibt es keinen politischen Extremismus.",
]

# example_inputs = [
#     "Current climate models (CMIP6) have difficulties in representing clouds.",
#     "Due to climate change, there is less spring snow cover in Europe.",
#     "Northern Hemisphere spring snow cover is decreasing because of anthropogenic climate change.",
#     "Carbon dioxide removal (CDR) has no substantial side-effects.", # 99
#     "Carbon dioxide removal (CDR) is highly effective and safe.",
#     "Climate change is the greatest threat to humanity.",
#     "An immediate global ban of fossil fuels is the absolutely only way to avoid climate armageddon.",
# ]

In [2]:
from evidence_seeker import EvidenceSeeker
from evidence_seeker.retrieval import RetrievalConfig

retrieval_config = RetrievalConfig(
    index_persist_path="../TMP/APUZ/storage",
    # Local model (via Huggingface API)
    embed_backend_type="huggingface",
    embed_model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
)
pipeline = EvidenceSeeker(retrieval_config=retrieval_config)

[32m2025-05-15 15:59:58.608[0m | [34m[1mDEBUG   [0m | [36mevidence_seeker.backend[0m:[36mget_openai_llm[0m:[36m242[0m - [34m[1mInstantiating OpenAILike model (model: llama-3.2-1b-instruct,base_url: http://127.0.0.1:1234/v1/).[0m
[32m2025-05-15 16:00:01.121[0m | [1mINFO    [0m | [36mevidence_seeker.retrieval.base[0m:[36mload_index[0m:[36m127[0m - [1mLoading index from disk at ../TMP/APUZ/storage/index[0m


In [5]:
from IPython.display import display, Markdown  # type: ignore

def describe_results(claim: str, results: list):
    preamble_template = (
        '## EvidenceSeeker Results\n\n'
        '### Input\n\n'
        '**Submitted claim:** {claim}\n\n'
        '### Results\n\n'
    )
    result_template = (
        '**Clarified claim:** <font color="orange">{text}</font> [_{statement_type}_]\n\n'
        '**Status**: {verbalized_confirmation}\n\n'
        '|Metric|Value|\n'
        '|:---|---:|\n'
        '|Average confirmation|{average_confirmation:.3f}|\n'
        '|Evidential divergence|{evidential_uncertainty:.3f}|\n'
        '|Width of evidential base|{n_evidence}|\n\n'
    )
    markdown = []
    markdown.append(preamble_template.format(claim=claim))
    for claim_dict in results:
        rdict = claim_dict.copy()
        rdict["statement_type"] = rdict["statement_type"].value
        markdown.append(result_template.format(**claim_dict))
    display(Markdown("\n".join(markdown)))


### 12/12/2024

In [None]:
for example_input in example_inputs:
    results = await pipeline(example_input)
    describe_results(example_input, results)
    display(Markdown("------\n"))

[32m2025-05-15 16:00:51.825[0m | [34m[1mDEBUG   [0m | [36mevidence_seeker.preprocessing.workflows[0m:[36mascriptive_analysis[0m:[36m174[0m - [34m[1mAnalysing ascriptive aspects of claim.[0m
[32m2025-05-15 16:00:51.837[0m | [34m[1mDEBUG   [0m | [36mevidence_seeker.preprocessing.workflows[0m:[36mdescriptive_analysis[0m:[36m118[0m - [34m[1mAnalysing descriptive aspects of claim.[0m
[32m2025-05-15 16:00:51.841[0m | [34m[1mDEBUG   [0m | [36mevidence_seeker.preprocessing.workflows[0m:[36mnormative_analysis[0m:[36m229[0m - [34m[1mAnalysing normative aspects of claim.[0m
[32m2025-05-15 16:01:33.818[0m | [34m[1mDEBUG   [0m | [36mevidence_seeker.preprocessing.workflows[0m:[36mlist_ascriptive_claims[0m:[36m212[0m - [34m[1mNumber of ascriptive claims: 1[0m
[32m2025-05-15 16:01:33.820[0m | [34m[1mDEBUG   [0m | [36mevidence_seeker.preprocessing.workflows[0m:[36mnegate_claim[0m:[36m282[0m - [34m[1mNegating claim.[0m
[32m2025-05-15

ValueError: Confirmation analysis failed.

[32m2025-05-15 16:25:18.358[0m | [34m[1mDEBUG   [0m | [36mevidence_seeker.confirmation_analysis.workflows[0m:[36mcollect_analyses[0m:[36m230[0m - [34m[1mProbs for the claim: [0.0, 0.0, 0.0][0m
[32m2025-05-15 16:25:18.358[0m | [34m[1mDEBUG   [0m | [36mevidence_seeker.confirmation_analysis.workflows[0m:[36mcollect_analyses[0m:[36m231[0m - [34m[1mProbs for the negation: [1.0, 0.0, 0.0][0m
[32m2025-05-15 16:25:18.360[0m | [34m[1mDEBUG   [0m | [36mevidence_seeker.confirmation_analysis.workflows[0m:[36mfreetext_analysis[0m:[36m117[0m - [34m[1mConfirmation analysis.[0m
[32m2025-05-15 16:25:18.363[0m | [34m[1mDEBUG   [0m | [36mevidence_seeker.confirmation_analysis.workflows[0m:[36mfreetext_analysis[0m:[36m117[0m - [34m[1mConfirmation analysis.[0m
[32m2025-05-15 16:25:25.784[0m | [34m[1mDEBUG   [0m | [36mevidence_seeker.confirmation_analysis.workflows[0m:[36mcollect_analyses[0m:[36m230[0m - [34m[1mProbs for the claim: [][0m
