# Fallacy detector chain

## Implementation

In [1]:
#| default_exp fallacy

In [2]:
#| export
from pino_inferior.core import DATA_DIR, PROMPTS_DIR, OPENAI_API_KEY
from langchain.schema.runnable import RunnableSequence
from langchain.callbacks.base import BaseCallbackHandler
from langchain.chains import LLMChain, SequentialChain
from langchain.llms.openai import BaseLLM
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain.chains.transform import TransformChain
from langchain.schema.messages import AIMessage, AIMessageChunk
import os
from dataclasses import dataclass
from typing import Union, List
import json
from datetime import datetime
from pino_inferior.message import Message

In [3]:
#| export
FALLACIES_FNAME = os.path.join(DATA_DIR, "fallacies.json")
FALLACIES_PROMPT_DIR = os.path.join(PROMPTS_DIR, "fallacies")

In [4]:
#| export
INPUT_FALLACIES = "fallacies"
INPUT_HISTORY = "history"
INPUT_CONTEXT = "context"
INPUT_QUERY = "query"

INTERMEDIATE_FALLACIES_STR = "fallacies_str"
INTERMEDIATE_HISTORY_STR = "history_str"
INTERMEDIATE_LAST_AUTHOR = "last_message_author"

OUTPUT_LLM_OUTPUT = "llm_output"
OUTPUT_SHORT_ANSWER = "answer"

LLM_OUTPUT_MARKER = "Therefore"

### Fallacy representation

In [5]:
#| export
@dataclass
class FallacyExample:
    text: str
    response: str

    def __str__(self) -> str:
        return f"Example: {self.text}\nExample Response: {self.response}"


@dataclass
class Fallacy:
    name: str
    description: str
    example: Union[FallacyExample, None]

    def __str__(self):
        result = f"# {self.name}\n\n{self.description}"
        if self.example:
            result += "\n\n" + str(self.example)
        return result
    

def read_fallacies(fname: str) -> List[Fallacy]:
    with open(fname, "r", encoding="utf-8") as src:
        data = json.load(src)
    result = []
    for item in data:
        if item.get("example"):
            example = FallacyExample(**item["example"])
        else:
            example = None
        fallacy = Fallacy(name=item["name"], description=item["description"], example=example)
        result.append(fallacy)
    return result

### Prompts

In [6]:
#| export
system_prompt = SystemMessagePromptTemplate.from_template_file(
    os.path.join(FALLACIES_PROMPT_DIR, "system.txt"),
    input_variables=[]
)
instruction_prompt = HumanMessagePromptTemplate.from_template_file(
    os.path.join(FALLACIES_PROMPT_DIR, "instruction.txt"),
    input_variables=[INTERMEDIATE_FALLACIES_STR,
                     INTERMEDIATE_HISTORY_STR,
                     INTERMEDIATE_LAST_AUTHOR,
                     INPUT_CONTEXT,
                     INPUT_QUERY]
)
chat_prompt = ChatPromptTemplate.from_messages([system_prompt, instruction_prompt])

### Conversions

#### History & fallacies to strings

Initially message history / fallacy types presented as objects.

We will need to stringify them.

**TODO:** Cut too long message lists.

In [7]:
#| export
def stringify(row):
    fallacies: List[Fallacy] = row[INPUT_FALLACIES]
    history: List[Message] = row[INPUT_HISTORY] # TODO: cut
    return {
        INTERMEDIATE_FALLACIES_STR: "\n\n".join(map(str, fallacies)),
        INTERMEDIATE_HISTORY_STR: "\n\n".join(map(str, history))
    }

async def astringify(row):
    return stringify(row)

#### Extract last user name

We will explicitly tell model to search for issues in the last user messages - we interested in it, aren't we?

In [8]:
#| export
def extract_last_user(row):
    history: List[Message] = row[INPUT_HISTORY]
    assert len(history) > 0
    return {
        INTERMEDIATE_LAST_AUTHOR: history[-1].author
    }

async def aextract_last_user(row):
    return extract_last_user(row)

#### Short answer extraction

After getting a full answer from LLM we should extract the short form.

In [9]:
#| export
def extract_answer_from_cot(row):
    response: Union[AIMessage, AIMessageChunk] = row[OUTPUT_LLM_OUTPUT]
    text: str = response.content
    text = text.split(LLM_OUTPUT_MARKER)[-1]
    text = text.split(":", maxsplit=1)[-1]
    text = text.strip()
    return {
        OUTPUT_SHORT_ANSWER: text
    }

async def aextract_answer_from_cot(row):
    return extract_answer_from_cot(row)

#### Full chain

In [10]:
#| export
def build_fallacy_detection_chain(llm: BaseLLM) -> RunnableSequence:
    stringify_transform = TransformChain(
        input_variables=[INPUT_FALLACIES, INPUT_HISTORY],
        output_variables=[INTERMEDIATE_FALLACIES_STR, INTERMEDIATE_HISTORY_STR],
        transform=stringify,
        atransform=astringify,
    )
    extract_last_user_transform = TransformChain(
        input_variables=[INPUT_HISTORY],
        output_variables=[INTERMEDIATE_LAST_AUTHOR],
        transform=extract_last_user,
        atransform=aextract_last_user,
    )
    extract_answer_transform = TransformChain(
        input_variables=[OUTPUT_LLM_OUTPUT],
        output_variables=[OUTPUT_SHORT_ANSWER],
        transform=extract_answer_from_cot,
        atransform=aextract_answer_from_cot,
    )
    return stringify_transform | \
        extract_last_user_transform | \
        chat_prompt | \
        llm | \
        extract_answer_transform

### Example

In [11]:
fallacies_detection_chain = build_fallacy_detection_chain(
    ChatOpenAI(
        model_name="gpt-4-0613",
        openai_api_key=OPENAI_API_KEY,
        streaming=True,
    )
)

In [12]:
fallacies = read_fallacies(FALLACIES_FNAME)
history = [
    Message("Moonlight", datetime.now(),
            "Soon we will finish with Ukraine"),
    Message("alex4321", datetime.now(),
            "After six months of taking Bakhmut, did anything new happen?\n\n" + \
                "Well, so that there is a reason to suspect that it will happen soon, " + \
                "and not something regardless of the outcome - this will last for years."),
    Message("Moonlight", datetime.now(),
            "Time is a resource, we have plenty of it")
]
context = "Post about the war between Russia/Ukraine"
query = "Moonlight's argument about time being a resource in war"

In [13]:
fallacies_detection_chain.invoke({
    "fallacies": fallacies,
    "history": history,
    "context": context,
    "query": query,
})

{'llm_output': AIMessageChunk(content='- Possible Fallacies in Moonlight\'s messages:\n  - Argumentum ad Ignorantiam (Appeal to Ignorance):\n    This fallacy could be present if Moonlight is implying that because no one has proven that Russia will not finish with Ukraine soon, it must be true. Moonlight\'s vague statement about time being a resource could be seen as avoiding the need to provide proof for his claim.\n  - Argumentum ad Baculum (Appeal to Force):\n    If Moonlight\'s statement "Soon we will finish with Ukraine" is interpreted as a threat or an attempt to intimidate, it could represent this fallacy. However, without further context and without knowing Moonlight\'s position or influence, it\'s hard to definitively identify this fallacy.\n  - False Dichotomy: \n    Moonlight\'s message doesn\'t present only two choices as the only options, so this fallacy is likely not present.\n  - Begging the Question (Circular Reasoning):\n    Moonlight\'s statement does not assume the co

In [14]:
await fallacies_detection_chain.ainvoke({
    "fallacies": fallacies,
    "history": history,
    "context": context,
    "query": query,
})

{'llm_output': AIMessageChunk(content='- Possible Fallacies in Moonlight\'s messages:\n  - Argumentum ad Ignorantiam (Appeal to Ignorance):\n    Moonlight asserts that "Time is a resource, we have plenty of it" in the context of war. However, they provide no evidence or justification for this assertion. It implies that because no one has proven the contrary, their statement must be true.\n  - Begging the Question (Circular Reasoning):\n    Moonlight\'s statement that "we have plenty of time" assumes the conclusion within the premise, that they have an unlimited amount of time, without providing any evidence or explanation.\n  - Argumentum ad Baculum (Appeal to Force):\n    Moonlight\'s statement may imply a veiled threat that they can keep fighting indefinitely due to their perceived abundance of time. However, this doesn\'t contribute to an informed discussion about the war situation and its potential resolution.\n  - Misleading Vividness:\n    Moonlight\'s statement could be seen as 

In [15]:
import nbdev; nbdev.nbdev_export()

Note nbdev2 no longer supports nbdev1 syntax. Run `nbdev_migrate` to upgrade.
See https://nbdev.fast.ai/getting_started.html for more information.
  warn(f"Notebook '{nbname}' uses `#|export` without `#|default_exp` cell.\n"
