# TruthTorchLM — Truthfulness Checks and Long‑form Generation Demo

This notebook demonstrates two things with the `TruthTorchLM` package:

1. Multiple‑LLM truthfulness checking for factual claims.
2. Long‑form generation with a truth value/score.

Notes:
- You said the package is already installed. If not, uncomment the install cell below.
- You will likely need API keys for whichever LLM backends the library uses (e.g., OpenAI, Anthropic, etc.). Set them in environment variables before running.
- Function names in the package may differ slightly. The notebook includes light introspection to locate the correct functions if they exist with a close name.


In [None]:
# If the package isn't installed in your environment, uncomment and run:
#!pip install -U TruthTorchLM


## Imports and package discovery


In [None]:
import importlib, importlib.util, inspect, os, re, types, json, textwrap

# Keep discovery simple and targeted to the intended package names
pkg_candidates = [
    'TruthTorchLM',
    'truthtorchlm',
]

ttlm = None
PKG_NAME = None
for name in pkg_candidates:
    try:
        if importlib.util.find_spec(name) is not None:
            ttlm = importlib.import_module(name)
            PKG_NAME = name
            break
    except Exception:
        pass

if ttlm is None:
    raise ImportError(
        "TruthTorchLM package not found in this kernel.\n"
        "Try installing it in this exact kernel/env:\n"
        "  pip install -U TruthTorchLM\n"
        "Or directly from GitHub if needed:\n"
        "  pip install -U git+https://github.com/Ybakman/TruthTorchLM\n"
        "Also ensure your Jupyter kernel uses the same virtualenv where you installed it."
    )

print(f'Using package: {PKG_NAME}')
attrs = [a for a in dir(ttlm) if not a.startswith('_')]
print('Available attributes (filtered):')
print([a for a in attrs if re.search(r'(truth|long|gen|check|verify)', a, re.I)])

# Try to get a version if exposed
version = getattr(ttlm, '__version__', None) or getattr(getattr(ttlm, 'version', None) or types.SimpleNamespace(), '__version__', None)
print('TruthTorchLM version:', version)


## Configure LLM providers (environment variables)

Set API keys as environment variables before running, for example:

- `OPENAI_API_KEY`
- `ANTHROPIC_API_KEY`
- `GOOGLE_API_KEY` (Gemini)

Only the providers supported by `TruthTorchLM` are needed. If the library exposes its own client configuration, feel free to adapt this cell accordingly.


In [None]:
providers_present = {
    'OPENAI_API_KEY': bool(os.getenv('OPENAI_API_KEY')),
    'ANTHROPIC_API_KEY': bool(os.getenv('ANTHROPIC_API_KEY')),
    'GOOGLE_API_KEY': bool(os.getenv('GOOGLE_API_KEY')),
}
print('Provider keys present:', providers_present)
CAN_CALL = any(providers_present.values())

if not CAN_CALL:
    print(textwrap.dedent('''
    No provider API keys detected. The demo will skip network calls.
    To enable full execution, set one or more of the following environment variables and re-run this notebook:
      - OPENAI_API_KEY
      - ANTHROPIC_API_KEY
      - GOOGLE_API_KEY
    '''))


## Helper: locate TruthTorchLM functions by name

The notebook tries to find closely named functions if the exact names differ.


In [6]:
from typing import Callable, Optional, Tuple, Iterable

def find_callable(module, candidates: Iterable[str]) -> Tuple[Optional[str], Optional[Callable]]:
    # Exact match first
    for name in candidates:
        fn = getattr(module, name, None)
        if callable(fn):
            return name, fn
    # Fuzzy search fallbacks
    lowered = [c.lower() for c in candidates]
    for a in dir(module):
        al = a.lower()
        if any(c in al for c in lowered):
            obj = getattr(module, a)
            if callable(obj):
                return a, obj
    return None, None

def describe_signature(fn):
    try:
        return str(inspect.signature(fn))
    except Exception:
        return '(unknown signature)'


## 1) Multiple‑LLM truthfulness check


In [7]:
# Candidate function names for multi‑LLM truthfulness checking
multi_truth_candidates = [
    'multiple_llm_truthfulness_check',
    'multi_llm_truthfulness_check',
    'multiple_llm_based_truthfulness_check',
    'truthfulness_check_multiple_llm',
    'truthfulness_across_llms',
    'check_truthfulness_multi_llm',
    'check_claim_truth_multi_llm',
    'check_truthfulness_across_models',
]

multi_truth_name, multi_truth_fn = find_callable(ttlm, multi_truth_candidates)
if not multi_truth_fn:
    print('Could not find a multi‑LLM truthfulness function by the expected names.')
    print('Available attributes that look relevant:')
    possible = [a for a in dir(ttlm) if re.search(r'truth|check|verify', a, re.I)]
    print(possible)
else:
    print(f'Using function: {multi_truth_name}{describe_signature(multi_truth_fn)}')

# Example claims to evaluate
claims = [
    'The Eiffel Tower is in Berlin.',  # false
    'Water boils at 100 degrees Celsius at standard atmospheric pressure.',  # true
]

# Example list of model names to consult (adjust to what's supported in your environment)
llm_models = [
    'gpt-4o-mini',
    'gpt-4o',
]

results_multi = []
if multi_truth_fn and CAN_CALL:
    for claim in claims:
        # Try to adaptively call the function based on its signature
        sig = None
        try:
            sig = inspect.signature(multi_truth_fn)
        except Exception:
            pass
        kwargs = {}
        if sig:
            for pname in sig.parameters:
                p = pname.lower()
                if 'claim' in p or 'statement' in p or 'text' in p or 'prompt' in p or 'question' in p:
                    kwargs[pname] = claim
                elif 'llm' in p or 'model' in p:
                    kwargs[pname] = llm_models
                elif 'provider' in p:
                    # provide empty or inferred providers list if needed
                    kwargs[pname] = None
        try:
            res = multi_truth_fn(**kwargs) if kwargs else multi_truth_fn(claim)
            results_multi.append({'claim': claim, 'result': res})
        except TypeError as te:
            print('Signature mismatch while calling', multi_truth_name, 'with kwargs', kwargs)
            print(te)
        except Exception as e:
            print('Error calling', multi_truth_name, '->', e)
else:
    if not CAN_CALL:
        print('Skipping multi‑LLM truthfulness checks due to missing provider API keys.')


Could not find a multi‑LLM truthfulness function by the expected names.
Available attributes that look relevant:
['GOOGLE_CHECK_QUERY_SYSTEM_PROMPT', 'GOOGLE_CHECK_QUERY_USER_PROMPT', 'GOOGLE_CHECK_VERIFICATION_SYSTEM_PROMPT', 'GOOGLE_CHECK_VERIFICATION_USER_PROMPT', 'TruthMethod', 'calibrate_truth_method', 'evaluate_truth_method', 'generate_with_truth_value', 'truth_methods']
Skipping multi‑LLM truthfulness checks due to missing provider API keys.


## 2) Long‑form generation with truth value/score

Some variants name this function `long_form_generation_with_truth_value`. The notebook will look for that or similarly named functions.


In [None]:
long_form_candidates = [
    'long_form_generation_with_truth_value',
    'long_form_generation_with_truthfulness',
    'generate_long_form_with_truth',
    'long_form_generate_with_truth_value',
    'longform_generation_with_truth_value',
]

long_form_name, long_form_fn = find_callable(ttlm, long_form_candidates)
if not long_form_fn:
    print('Could not find the long‑form generation with truth value function by the expected names.')
    print('Available attributes that look relevant:')
    possible = [a for a in dir(ttlm) if re.search(r'long|truth|gen|compose|article', a, re.I)]
    print(possible)
else:
    print(f'Using function: {long_form_name}{describe_signature(long_form_fn)}')

prompt = (
    'Explain the history of the Eiffel Tower in about 150-250 words, and include factual references. '
    'Return both the generated text and a truthfulness score if supported.'
)

long_form_output = None
if long_form_fn and CAN_CALL:
    sig = None
    try:
        sig = inspect.signature(long_form_fn)
    except Exception:
        pass
    kwargs = {}
    if sig:
        for pname in sig.parameters:
            p = pname.lower()
            if 'prompt' in p or 'question' in p or 'topic' in p or 'query' in p or 'instruction' in p or 'text' in p:
                kwargs[pname] = prompt
            elif 'llm' in p or 'model' in p:
                kwargs[pname] = 'gpt-4o-mini'
            elif 'max_tokens' in p or 'length' in p or 'words' in p:
                kwargs[pname] = 300
            elif 'return' in p and 'score' in p:
                kwargs[pname] = True
    try:
        long_form_output = long_form_fn(**kwargs) if kwargs else long_form_fn(prompt)
    except TypeError as te:
        print('Signature mismatch while calling', long_form_name, 'with kwargs', kwargs)
        print(te)
    except Exception as e:
        print('Error calling', long_form_name, '->', e)
else:
    if not CAN_CALL:
        print('Skipping long‑form generation due to missing provider API keys.')


## Pretty‑print results (if structures are returned)


In [None]:

def to_jsonable(obj):
    try:
        json.dumps(obj)
        return obj
    except Exception:
        if isinstance(obj, dict):
            return {k: to_jsonable(v) for k, v in obj.items()}
        elif isinstance(obj, (list, tuple, set)):
            return [to_jsonable(x) for x in obj]
        else:
            return str(obj)

if 'results_multi' in globals() and results_multi:
    print('Multi‑LLM truthfulness results:')
    print(json.dumps(to_jsonable(results_multi), indent=2))

if 'long_form_output' in globals() and long_form_output is not None:
    print('\nLong‑form generation output:')
    print(json.dumps(to_jsonable(long_form_output), indent=2))


## Tips
- If your package exposes classes instead of top‑level functions (e.g., a `TruthTorch` or `Verifier` class), instantiate it and re‑run the introspection for methods on that instance.
- If a function requires specific provider objects rather than model names, consult the library README and pass those objects instead of strings.
- For reproducibility, consider setting a seed if the library offers one.
