# Fallacy detector chain

## Implementation

In [None]:
#| default_exp fallacy

In [None]:
#| export
from pino_inferior.core import DATA_DIR, PROMPTS_DIR, OPENAI_API_KEY
from langchain.schema.runnable import RunnableSequence
from langchain.callbacks.base import BaseCallbackHandler
from langchain.chains import LLMChain, SequentialChain
from langchain.llms.openai import BaseLLM
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain.chains.transform import TransformChain
from langchain.schema.messages import AIMessage, AIMessageChunk
import os
from dataclasses import dataclass
from typing import Union, List
import json
from datetime import datetime
from pino_inferior.message import Message

In [None]:
#| export
FALLACIES_FNAME = os.path.join(DATA_DIR, "fallacies.json")
FALLACIES_PROMPT_DIR = os.path.join(PROMPTS_DIR, "fallacies")

In [None]:
#| export
INPUT_FALLACIES = "fallacies"
INPUT_HISTORY = "history"
INPUT_CONTEXT = "context"
INPUT_QUERY = "query"

INTERMEDIATE_FALLACIES_STR = "fallacies_str"
INTERMEDIATE_HISTORY_STR = "history_str"
INTERMEDIATE_LAST_AUTHOR = "last_message_author"

OUTPUT_LLM_OUTPUT = "llm_output"
OUTPUT_SHORT_ANSWER = "answer"

LLM_OUTPUT_MARKER = "Therefore"

### Fallacy representation

In [None]:
#| export
@dataclass
class FallacyExample:
    text: str
    response: str

    def __str__(self) -> str:
        return f"Example: {self.text}\nExample Response: {self.response}"


@dataclass
class Fallacy:
    name: str
    description: str
    example: Union[FallacyExample, None]

    def __str__(self):
        result = f"# {self.name}\n\n{self.description}"
        if self.example:
            result += "\n\n" + str(self.example)
        return result
    

def read_fallacies(fname: str) -> List[Fallacy]:
    with open(fname, "r", encoding="utf-8") as src:
        data = json.load(src)
    result = []
    for item in data:
        if item.get("example"):
            example = FallacyExample(**item["example"])
        else:
            example = None
        fallacy = Fallacy(name=item["name"], description=item["description"], example=example)
        result.append(fallacy)
    return result

### Prompts

In [None]:
#| export
system_prompt = SystemMessagePromptTemplate.from_template_file(
    os.path.join(FALLACIES_PROMPT_DIR, "system.txt"),
    input_variables=[]
)
instruction_prompt = HumanMessagePromptTemplate.from_template_file(
    os.path.join(FALLACIES_PROMPT_DIR, "instruction.txt"),
    input_variables=[INTERMEDIATE_FALLACIES_STR,
                     INTERMEDIATE_HISTORY_STR,
                     INTERMEDIATE_LAST_AUTHOR,
                     INPUT_CONTEXT,
                     INPUT_QUERY]
)
chat_prompt = ChatPromptTemplate.from_messages([system_prompt, instruction_prompt])

### Conversions

#### History & fallacies to strings

Initially message history / fallacy types presented as objects.

We will need to stringify them.

**TODO:** Cut too long message lists.

In [None]:
#| export
def stringify(row):
    fallacies: List[Fallacy] = row[INPUT_FALLACIES]
    history: List[Message] = row[INPUT_HISTORY] # TODO: cut
    return {
        INTERMEDIATE_FALLACIES_STR: "\n\n".join(map(str, fallacies)),
        INTERMEDIATE_HISTORY_STR: "\n\n".join(map(str, history))
    }

async def astringify(row):
    return stringify(row)

#### Extract last user name

We will explicitly tell model to search for issues in the last user messages - we interested in it, aren't we?

In [None]:
#| export
def extract_last_user(row):
    history: List[Message] = row[INPUT_HISTORY]
    assert len(history) > 0
    return {
        INTERMEDIATE_LAST_AUTHOR: history[-1].author
    }

async def aextract_last_user(row):
    return extract_last_user(row)

#### Short answer extraction

After getting a full answer from LLM we should extract the short form.

In [None]:
#| export
def extract_answer_from_cot(row):
    response: Union[AIMessage, AIMessageChunk] = row[OUTPUT_LLM_OUTPUT]
    text: str = response.content
    text = text.split(LLM_OUTPUT_MARKER)[-1]
    text = text.split(":", maxsplit=1)[-1]
    text = text.strip()
    return {
        OUTPUT_SHORT_ANSWER: text
    }

async def aextract_answer_from_cot(row):
    return extract_answer_from_cot(row)

#### Full chain

In [None]:
#| export
def build_fallacy_detection_chain(llm: BaseLLM) -> RunnableSequence:
    stringify_transform = TransformChain(
        input_variables=[INPUT_FALLACIES, INPUT_HISTORY],
        output_variables=[INTERMEDIATE_FALLACIES_STR, INTERMEDIATE_HISTORY_STR],
        transform=stringify,
        atransform=astringify,
    )
    extract_last_user_transform = TransformChain(
        input_variables=[INPUT_HISTORY],
        output_variables=[INTERMEDIATE_LAST_AUTHOR],
        transform=extract_last_user,
        atransform=aextract_last_user,
    )
    extract_answer_transform = TransformChain(
        input_variables=[OUTPUT_LLM_OUTPUT],
        output_variables=[OUTPUT_SHORT_ANSWER],
        transform=extract_answer_from_cot,
        atransform=aextract_answer_from_cot,
    )
    return stringify_transform | \
        extract_last_user_transform | \
        chat_prompt | \
        llm | \
        extract_answer_transform

### Example

In [None]:
fallacies_detection_chain = build_fallacy_detection_chain(
    ChatOpenAI(
        model_name="gpt-4-0613",
        openai_api_key=OPENAI_API_KEY,
        streaming=True,
    )
)

In [None]:
fallacies = read_fallacies(FALLACIES_FNAME)
history = [
    Message("Moonlight", datetime.now(),
            "Soon we will finish with Ukraine"),
    Message("alex4321", datetime.now(),
            "After six months of taking Bakhmut, did anything new happen?\n\n" + \
                "Well, so that there is a reason to suspect that it will happen soon, " + \
                "and not something regardless of the outcome - this will last for years."),
    Message("Moonlight", datetime.now(),
            "Time is a resource, we have plenty of it")
]
context = "Post about the war between Russia/Ukraine"
query = "Moonlight's argument about time being a resource in war"

In [None]:
fallacies_detection_chain.invoke({
    "fallacies": fallacies,
    "history": history,
    "context": context,
    "query": query,
})

{'llm_output': AIMessageChunk(content='- Possible Fallacies in Moonlight\'s messages:\n  - Argumentum ad Ignorantiam (Appeal to Ignorance):\n    Moonlight\'s claim "Time is a resource, we have plenty of it" could potentially contain this fallacy as the speaker assumes that because no one has proven Russia doesn\'t have enough time, it must have an abundance of time. However, this is not enough to confirm the presence of this fallacy definitely.\n  - Argumentum ad Populum (Appeal to Popular Opinion):\n    There is no indication of this fallacy since Moonlight doesn\'t justify their argument by saying \'everyone else believes it\' or \'it is popular.\'\n  - Argumentum ad Hominem (Personal Attack):\n    This fallacy is not present as Moonlight doesn\'t attack the character, motive, or attributes of the person making the argument.\n  - Argumentum ad Misericordiam (Appeal to Pity):\n    Moonlight\'s argument does not use emotional appeals like sympathy, pity, or fear to persuade the audienc

In [None]:
await fallacies_detection_chain.ainvoke({
    "fallacies": fallacies,
    "history": history,
    "context": context,
    "query": query,
})

{'llm_output': AIMessageChunk(content='- Possible Fallacies in Moonlight\'s messages:\n  - Argumentum ad Ignorantiam (Appeal to Ignorance):\n    Moonlight\'s assertion that "Time is a resource, we have plenty of it" could potentially be seen as an appeal to ignorance. The speaker does not provide specific evidence or logical reasons to support the claim that they have plenty of time, which could suggest an assumption that the argument is valid because it hasn\'t been proven false.\n  - Contradicting (Inconsistent) statements the same author:\n    In the first message, Moonlight asserts confidently that "Soon we will finish with Ukraine", which suggests a prompt resolution to the conflict. However, in the last message, the speaker seems to contradict this initial statement by saying "Time is a resource, we have plenty of it", implying that the resolution of the conflict could take a significant amount of time.\n\nTherefore, the answer is:\n  - Identified Fallacy 1: Argumentum ad Ignoran

In [None]:
import nbdev; nbdev.nbdev_export()