# TruthTorchLM — Quickstart: simple method tests and examples

This minimal notebook shows how to:

- Run a multiple‑LLM truthfulness check on 1–2 claims
- Generate a short long‑form answer with an optional truth/score

Prereqs:
- Install the package in this kernel/env (uncomment if needed):
  - `%pip install -U TruthTorchLM`
  - or `%pip install -U git+https://github.com/Ybakman/TruthTorchLM`
- Set at least one provider API key supported by your setup:
  - `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, or `GOOGLE_API_KEY`


In [None]:
# Quick environment diagnostics (helps ensure the correct venv/kernel)
import sys, os, importlib.util
print('Python:', sys.version)
print('Kernel executable:', sys.executable)
print('Has TruthTorchLM?', bool(importlib.util.find_spec('TruthTorchLM')))
print('Has truthtorchlm?', bool(importlib.util.find_spec('truthtorchlm')))
print('OPENAI_API_KEY set?', bool(os.getenv('OPENAI_API_KEY')))
print('ANTHROPIC_API_KEY set?', bool(os.getenv('ANTHROPIC_API_KEY')))
print('GOOGLE_API_KEY set?', bool(os.getenv('GOOGLE_API_KEY')))


## Import package (simple)


In [None]:
import importlib, importlib.util, types

def import_tt():
    for name in ('TruthTorchLM', 'truthtorchlm'):
        if importlib.util.find_spec(name) is not None:
            mod = importlib.import_module(name)
            return name, mod
    raise ImportError(
        'TruthTorchLM not found in this kernel.\n'
        'Install into THIS kernel or switch kernel, e.g.:\n'
        '  %pip install -U TruthTorchLM\n'
        'Or from GitHub:\n'
        '  %pip install -U git+https://github.com/Ybakman/TruthTorchLM\n'
        'Then restart the kernel.'
    )

PKG_NAME, ttlm = import_tt()
version = getattr(ttlm, '__version__', None) or getattr(getattr(ttlm, 'version', None) or types.SimpleNamespace(), '__version__', None)
print(f'Using package: {PKG_NAME}, version: {version}')


## Provider keys check (network calls will be skipped if none present)


In [None]:
import textwrap, json
CAN_CALL = any([
    bool(os.getenv('OPENAI_API_KEY')),
    bool(os.getenv('ANTHROPIC_API_KEY')),
    bool(os.getenv('GOOGLE_API_KEY')),
])
if not CAN_CALL:
    print(textwrap.dedent('''
    No provider API keys detected — examples will be skipped.
    Set one of: OPENAI_API_KEY, ANTHROPIC_API_KEY, GOOGLE_API_KEY and re‑run.
    '''))


## Helpers: find functions and extract scores


In [None]:
import inspect, re
from typing import Callable, Iterable, Optional, Tuple, Any

def find_callable(module, candidates: Iterable[str]) -> Tuple[Optional[str], Optional[Callable]]:
    # Exact match only (keep it simple)
    for name in candidates:
        fn = getattr(module, name, None)
        if callable(fn):
            return name, fn
    # If none exact, show a short hint and return None
    looks = [a for a in dir(module) if re.search(r'(truth|long|gen|check|verify)', a, re.I)]
    print('Could not find any of', list(candidates))
    if looks:
        print('Related names in the package:', looks[:12], '...')
    return None, None

# Try to pull a numeric truth score from various common shapes
SCORE_KEYS = ('truth_score', 'truth', 'score', 'truthfulness')

def extract_score(obj: Any) -> Optional[float]:
    try:
        # direct numeric
        if isinstance(obj, (int, float)):
            return float(obj)
        # dict with score-ish fields
        if isinstance(obj, dict):
            for k in SCORE_KEYS:
                if k in obj:
                    try:
                        return float(obj[k])
                    except Exception:
                        pass
            # nested
            for v in obj.values():
                s = extract_score(v)
                if isinstance(s, (int, float)):
                    return float(s)
        # list/tuple
        if isinstance(obj, (list, tuple)):
            for v in obj:
                s = extract_score(v)
                if isinstance(s, (int, float)):
                    return float(s)
    except Exception:
        pass
    return None


## 1) Multiple‑LLM truthfulness check — simple run + smoke test


In [1]:
# Adjust names to match your installed version if needed
multi_truth_candidates = (
    'multiple_llm_truthfulness_check',
    'multi_llm_truthfulness_check',
    'multiple_llm_based_truthfulness_check',
)
name_mt, fn_mt = find_callable(ttlm, multi_truth_candidates)

claims = [
    'The Eiffel Tower is in Berlin.',            # false
    'Water boils at 100°C at standard pressure.' # true
]
models = ['gpt-4o-mini']  # keep it to one lightweight model

results_mt = []
if fn_mt and CAN_CALL:
    # Adapt to common signatures
    kwargs_template = []
    try:
        sig = inspect.signature(fn_mt)
    except Exception:
        sig = None
    for claim in claims:
        kwargs = {}
        if sig:
            for pname in sig.parameters:
                p = pname.lower()
                if 'claim' in p or 'statement' in p or 'text' in p or 'prompt' in p or 'question' in p:
                    kwargs[pname] = claim
                elif 'llm' in p or 'model' in p:
                    kwargs[pname] = models
        try:
            out = fn_mt(**kwargs) if kwargs else fn_mt(claim)
            results_mt.append({'claim': claim, 'result': out})
        except TypeError:
            # Fallback: try positional with (claim, models)
            try:
                out = fn_mt(claim, models)
                results_mt.append({'claim': claim, 'result': out})
            except Exception as e:
                print('Call failed for claim:', claim, '->', e)

    # Simple smoke test if scores can be extracted
    if len(results_mt) == 2:
        s_false = extract_score(results_mt[0]['result'])
        s_true  = extract_score(results_mt[1]['result'])
        if s_false is not None and s_true is not None:
            try:
                assert s_true >= s_false, f'Expected true-claim score >= false-claim score, got {s_true} vs {s_false}'
                print('Truthfulness score test passed:', s_true, '>=', s_false)
            except AssertionError as ae:
                print('Truthfulness score test FAILED:', ae)
        else:
            print('No numeric truth scores found — skipping score assertion.')
else:
    if not fn_mt:
        print('Skipping — multi‑LLM truth function not found.')
    elif not CAN_CALL:
        print('Skipping — missing provider API keys.')


NameError: name 'find_callable' is not defined

## 2) Long‑form generation with truth value — simple run + smoke test


In [None]:
long_form_candidates = (
    'long_form_generation_with_truth_value',
    'long_form_generation_with_truthfulness',
    'generate_long_form_with_truth',
)
name_lf, fn_lf = find_callable(ttlm, long_form_candidates)

prompt = 'Explain the history of the Eiffel Tower in ~120–200 words with factual references.'
long_form_out = None
if fn_lf and CAN_CALL:
    try:
        sig = inspect.signature(fn_lf)
    except Exception:
        sig = None
    kwargs = {}
    if sig:
        for pname in sig.parameters:
            p = pname.lower()
            if 'prompt' in p or 'question' in p or 'topic' in p or 'query' in p or 'instruction' in p or 'text' in p:
                kwargs[pname] = prompt
            elif 'llm' in p or 'model' in p:
                kwargs[pname] = 'gpt-4o-mini'
            elif 'max_tokens' in p or 'length' in p or 'words' in p:
                kwargs[pname] = 300
            elif 'return' in p and 'score' in p:
                kwargs[pname] = True
    try:
        long_form_out = fn_lf(**kwargs) if kwargs else fn_lf(prompt)
    except TypeError:
        try:
            long_form_out = fn_lf(prompt, 'gpt-4o-mini')
        except Exception as e:
            print('Long‑form call failed ->', e)

    # Smoke tests: presence of text and (optional) score
    text_val = None
    if isinstance(long_form_out, str):
        text_val = long_form_out
    elif isinstance(long_form_out, dict):
        for k in ('text', 'output', 'answer', 'content'):
            if k in long_form_out and isinstance(long_form_out[k], str):
                text_val = long_form_out[k]
                break
    if text_val:
        try:
            assert len(text_val) >= 80, f'Expected at least ~80 chars of text, got {len(text_val)}'
            print('Long‑form text length test passed:', len(text_val), 'chars')
        except AssertionError as ae:
            print('Long‑form text length test FAILED:', ae)
    else:
        print('Could not locate generated text — skipping length assertion.')

    s = extract_score(long_form_out)
    if s is not None:
        try:
            # Soft bound check — just ensure it is a finite number
            assert s == s and abs(s) < 1e6, f'Unreasonable score value: {s}'
            print('Long‑form score looks reasonable:', s)
        except AssertionError as ae:
            print('Long‑form score test FAILED:', ae)
else:
    if not fn_lf:
        print('Skipping — long‑form function not found.')
    elif not CAN_CALL:
        print('Skipping — missing provider API keys.')


## Done
- You now have minimal, self‑contained examples for both features.
- If a function name differs in your installed version, replace the candidate lists at the top of each section with the correct name and re‑run.
