In [1]:
use_existing_rag = True

In [2]:
from datasets import load_dataset

if not use_existing_rag:
    ds = load_dataset("QuotaClimat/frugalaichallenge-text-train", cache_dir="data")

In [3]:
import tiktoken
import tokenizers
from langchain_core.documents import Document


if not use_existing_rag:
    tokenizer = tiktoken.encoding_for_model("gpt-4o-mini")

    documents = []
    # llama_tokenizer = tokenizers.Tokenizer.from_pretrained("meta-llama/Llama-3.1-8B-Instruct-q4_0")

    for record in ds["train"]:
        metadata = {k: v for k, v in record.items() if k != "quote"}
        metadata["n_tokens_gpt"] = len(tokenizer.encode(record["quote"]))
        # metadata["n_tokens_llama"] = len(llama_tokenizer.tokenize(record["quote"]))
        documents.append(Document(page_content=record["quote"], metadata=metadata))


    documents[0]

In [4]:
from uuid import uuid4

from langchain_huggingface import HuggingFaceEmbeddings
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams


qclient = QdrantClient(host="localhost", port=6334)
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L12-v2")

collections = [collection.name for collection in qclient.get_collections().collections]
print(collections)
if "frugalaichallenge-text-train" not in collections:
    qclient.create_collection(
        collection_name="frugalaichallenge-text-train",
        vectors_config=VectorParams(size=384, distance=Distance.COSINE),
    )

vector_store = QdrantVectorStore(
    client=qclient,
    collection_name="frugalaichallenge-text-train",
    embedding=embedding_model,
)

if "frugalaichallenge-text-train" not in collections:

    uuids = [str(uuid4()) for _ in range(len(documents))]

    vector_store.add_documents(documents=documents, ids=uuids)

['frugalaichallenge-text-train']


In [5]:
results = vector_store.similarity_search("The world has not gotten warmer", k=2)

for res in results:
    print(f"* {res.page_content} [{res.metadata}]")

* oh yeah , the global temperature trend has not warmed for 19 years . [{'label': '1_not_happening', 'source': 'FLICC', 'url': 'https://huggingface.co/datasets/fzanartu/FLICCdataset', 'language': 'en', 'subsource': 'jintrain', 'id': None, '__index_level_0__': 1098, 'n_tokens_gpt': 15, '_id': 'ac447842-acca-4ecd-805f-b7f87764fab1', '_collection_name': 'frugalaichallenge-text-train'}]
* global warming doesn't exist -- or at least hasn't for 19 years [{'label': '1_not_happening', 'source': 'FLICC', 'url': 'https://huggingface.co/datasets/fzanartu/FLICCdataset', 'language': 'en', 'subsource': 'jintrain', 'id': None, '__index_level_0__': 636, 'n_tokens_gpt': 13, '_id': 'cec6b357-e1a8-4516-bd3d-66ea1679e1df', '_collection_name': 'frugalaichallenge-text-train'}]


In [6]:
import json

from langchain.tools import tool

qdrant_retriever = vector_store.as_retriever(search_kwargs={"k": 5})


@tool(name_or_callable="qdrant_retriever")
def qdrant_retriever_tool(query: str):
    """
    Example quotes containing different levels of climate change desinformation, separated in different categories:
    - 0_accepted: No relevant environmental disinformation claim detected.
    - 1_not_happening: Global warming is not happing. Climate change is NOT leading to melting ice (such as glaciers, sea ice, and permafrost), increased extreme weather, or rising sea levels. Cold weather also shows that climate change is not happening.
    - 2_not_human: Greenhouse gases from humans are not the causing climate change.
    - 3_not_bad: The impacts of climate change will not be bad and might even be beneficial.
    - 4_solutions_wont_work: Climate solutions are harmful or unnecessary.
    - 5_science_is_unreliable: Climate science is uncertain, unsound, unreliable, or biased.
    - 6_scientists_are_biased: Climate scientists and proponents of climate action are alarmist, biased, wrong, hypocritical, corrupt, and/or politically motivated.
    """
    results = qdrant_retriever.invoke(query)
    return json.dumps(
        [{"quote": res.page_content, "label": res.metadata["label"]} for res in results]
    )


display(json.loads(qdrant_retriever_tool.invoke("The world has not gotten warmer")))

[{'quote': 'oh yeah , the global temperature trend has not warmed for 19 years .',
  'label': '1_not_happening'},
 {'quote': "global warming doesn't exist -- or at least hasn't for 19 years",
  'label': '1_not_happening'},
 {'quote': 'In the most recent 5,000-year period, there have been numerous periods of distinct global warming and global cooling. However, the overall long-term climatic trend indicates that the earth has been getting cooler, not warmer,',
  'label': '1_not_happening'},
 {'quote': 'The world has not warmed up very much since the millennium. Twelve years is a reasonable time it (the temperature) has stayed almost constant, whereas it should have been rising carbon dioxide is rising, no question about that.',
  'label': '1_not_happening'},
 {'quote': 'While evidence seems to indicate that we have been warming up nicely for the past several hundred years, at present we may be in a slight cooling trend. There has been no global warming for almost 15 years.',
  'label': '

Agent structure:

1. Classify as relevant for the environmental disinformation. (n)
2. Return None if classified as not relevant, else point 3. (ce)
3. Claim extraction from the text (extract one claim from the text). (n)
4. Retrieve similar examples (n)
5. Classify text and claim (n)
6. Check if llm agrees with the following extraction. (n)
7. If yes return classification (ce + n)
8. If not extract another claim that is more representative (ce + n)
9. Back to point 4 (e)

In [148]:
from typing import Annotated, Callable, Dict, List, Literal, Optional, Sequence, Any

from langchain import hub
from langchain_core.language_models.chat_models import BaseChatModel
from langchain_core.messages import BaseMessage, HumanMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langgraph.graph import END, START, StateGraph
from langgraph.graph.message import add_messages
from langgraph.prebuilt import ToolNode, tools_condition
from pydantic import BaseModel, ConfigDict, Field
from typing_extensions import TypedDict
from json import JSONDecodeError


class DocumentRelevance(BaseModel):
    """Binary score for relevance check."""

    relevant: str = Field(description="Relevance: 'yes' or 'no'")


class DesinformationClassification(BaseModel):
    """Classification of the text as relevant for environmental disinformation."""

    relevant: str = Field(description="Disinformation classification: 'yes' or 'no'")


class ClassificationCheck(BaseModel):
    """Check if the classification is correct."""

    correct: str = Field(description="Correctness: 'yes' or 'no'")


class AgentState(TypedDict):
    # The add_messages function defines how an update should be processed
    # Default is to replace. add_messages says "append"
    messages: Annotated[Sequence[BaseMessage], add_messages]
    senders: Annotated[Sequence[str], add_messages]


class RagAgent(BaseModel):
    model: BaseChatModel
    tools: List[Callable]
    graph: StateGraph
    verbose: Optional[bool] = True

    model_config = ConfigDict(arbitrary_types_allowed=True)

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        # nodes
        # retriever_tool = ToolNode(tools=self.tools)
        self.graph.add_node("classificator", self.classificator)
        self.graph.add_node("none_returner", self.none_returner)
        self.graph.add_node("claim_extractor", self.claim_extractor)
        self.graph.add_node("rag_tool", self.rag_tool)
        self.graph.add_node("claim_classificator", self.claim_classificator)
        # self.graph.add_node("double_check", self.double_check)
        self.graph.add_node("returner", self.returner)

        # edges
        # start generation
        self.graph.add_edge(START, "classificator")
        self.graph.add_conditional_edges(
            "classificator",
            # Assess agent decision
            self.classificator_edge,  # Non need to format as outputs already have node names
        )
        self.graph.add_edge("none_returner", END)
        self.graph.add_edge("claim_extractor", "rag_tool")
        self.graph.add_edge("rag_tool", "claim_classificator")
        self.graph.add_edge("claim_classificator", "returner")
        # self.graph.add_edge("claim_classificator", "double_check")
        # self.graph.add_conditional_edges(
        #     "double_check",
        #     # Assess agent decision
        #     self.double_check_edge,  # Non need to format as outputs already have node names
        # )
        self.graph.add_edge("returner", END)

    def __print(self, *args, **kwargs):
        """Internal print used to print if verbose is set"""
        if self.verbose:
            print(*args, **kwargs)

    def compile(self):
        """Compiles the graph into an executable agent"""
        return self.graph.compile()

    def classificator(self, state: AgentState):
        """
        Classify the text as relevant for the environmental disinformation.

        Args:
            state (messages): The current state

        Returns:
            dict: The updated state with the agent response appended to messages"""
        self.__print("----- Classifying text -----")

        prompt = PromptTemplate(
            template="""
                You are an assistant in a climate change media study group. The group analyses media transcripts related 
                to climate change and its impacts. You mission is to examine very carefully the transcripts that are givrn to you
                and judge whether or not they are related to climate change. For this task you are not required to pass a 
                judgement on the transcript in question, just judge whether it talks about climate change in any way.
                The text might also be talking about economic or social policies that are somehow related to climate change
                (for example the 'Green Deal' or the 'Paris Agreement'), these count as transcripts related to climate change.
                Keep in mind that a lot of the texts are not perfectly transcribed so watch out for typos and other errors.

                Give a binary score 'yes' or 'no' score to indicate whether the contains mentions of climate change.
                Here is the transcript: {text}
                REMEMBER TO REPLY ONLY 'yes' or 'no'. Do NOT EXPLAIN ! \n 
                Answer:
            """,
            input_variables=["text"],
        )
        chain = prompt | self.model

        messages = state["messages"]
        last_message = messages[0]

        text = last_message.content

        reply = chain.invoke({"text": text})
        classification = DesinformationClassification(relevant=reply.content).relevant
        self.__print(f"Decision: Concerns Climate? {classification}")
        return {"messages": [classification], "senders": ["classificator"]}

    def classificator_edge(
        self, state: AgentState
    ) -> Literal["claim_extractor", "none_returner"]:
        """
        Determines whether the text contains environmental or climate change disinformation.

        Args:
            state (messages): The current state

        Returns:
            str: A decision for whether the documents are relevant or not
        """
        if state["messages"][-1].content == "yes":
            return "claim_extractor"
        else:
            self.__print("----- Return None -----")
            return "none_returner"

    def none_returner(self, state: AgentState) -> Dict[str, List[Any]]:
        return {"messages": ["None"], "senders": ["none_returner"]}

    def claim_extractor(self, state: AgentState):
        """
        Extracts a claim from the text.

        Args:
            state (messages): The current state

        Returns:
            dict: The updated state with the agent response appended to messages
        """
        self.__print("----- Extract claim -----")
        template = """
                You are a climate disinformation assistant trying to identify claims in media transcripts
                related to disinformation (voluntary spread of fake news) or misinformation (unvoluntary repetition
                of incorrect information). You will be given a text transcript.
                Extract one claim from the text that captures the main concepts expressed in the text without reformulating.
                If no claim is present, return "no_claim".
                Here is the transcript: {text}
            """
        messages = state["messages"]
        senders = state["senders"]
        initial_message = messages[0]
        text = initial_message.content

        if senders[-1] == "double_check":
            old_claim = json.loads(messages[-2].content)["claim"]
            template = (
                template
                + f"\nDuring a previous analysis, the following claim was found: {old_claim}\nExtract a different claim."
            )

        template = (
            template
            + "\nREMEMBER TO REPLY WITH ONLY ONE CLAIM. Do NOT EXPLAIN ! IF NO CLAIM IS PRESENT, RETURN 'no_claim'.\nAnswer:"
        )
        prompt = PromptTemplate(template=template, input_variables=["text"])
        chain = prompt | self.model

        reply = chain.invoke({"text": text})
        claim = reply.content
        response = json.dumps({"claim": claim, "text": text})
        self.__print(claim)
        return {"messages": [response], "senders": ["claim_extractor"]}
    
    def rag_tool(self, state: AgentState):
        self.__print("----- Classificator Retrieve similar records -----")

        messages = state["messages"]
        last_message = json.loads(messages[-1].content)
        text = last_message["text"]
        rag = qdrant_retriever_tool.invoke(text)
        examples = "\n".join([f"*{record['label']}: {record['quote']}" for record in json.loads(rag)])
        self.__print("Found:")
        self.__print(examples)
        return {"messages": [examples], "senders": ["rag_tool"]}

    def claim_classificator(self, state: AgentState):
        """
        Classifies the claim according to the pre-defined categories (CARDS).

        Args:
            state (messages): The current state

        Returns:
            dict: The updated state with the agent response appended to messages
        """
        self.__print("----- Classify Claim -----")

        prompt = PromptTemplate.from_template(
            template="""
                You are an expert in disinformation on environmental and climate subjects,
                expert in climate science and know everything about the IPCC. 
                I am going to give you a claim coming from quotes on medias and the original media transcript and
                similar claims from our database along with their classification.
                Your task is to analyse the claim and then classify it following the pre-defined categories.
                Use the examples provided to determine the classification of the claim.

                Categories:
                - 0_accepted: No relevant environmental disinformation claim detected.
                - 1_not_happening: Global warming is not happing. Climate change is NOT leading to melting ice (such as glaciers, sea ice, and permafrost), increased extreme weather, or rising sea levels. Cold weather also shows that climate change is not happening.
                - 2_not_human: Greenhouse gases from humans are not the causing climate change.
                - 3_not_bad: The impacts of climate change will not be bad and might even be beneficial.
                - 4_solutions_wont_work: Climate solutions are harmful or unnecessary.
                - 5_science_is_unreliable: Climate science is uncertain, unsound, unreliable, or biased.
                - 6_scientists_are_biased: Climate scientists and proponents of climate action are alarmist, biased, wrong, hypocritical, corrupt, and/or politically motivated.
                - 7_need_fossil_fuel: We need fossil fuels for economic growth, prosperity, and to maintain our standard of living.

                Start with an analysis of the text and then classify it according to the pre-defined categories (CARDS).
                Do not be afraid to use the category "0_accepted". Use the other catefories only if there is a clear evidence of
                desinformation or misinformation in the text.
                Your response should be in JSON format with only two key-value pairs:
                {
                
                    "analysis": <analysis>,
                    "category": <category>
                }
                For example, the claim:
                <There is clear, compelling evidence that many of the major conclusions of the IPCC, your new religions constantly-changing Holy Book, are based on evidence that has been fabricated. The hockey stick graph that purported to abolish the mediaeval warm period is just one example.>
                Is analysed as:
                {
                    "analysis": "The claim implies that the science behind the IPCC is constantly changing, undermining the credibility of scientific evidence.",
                    "category": "5_science_unreliable"
                }

                Transcript: {{text}}
                Claim: {{claim}}
                Here are some similar claims from our database and their classification. Use these to decide what category this claim belongs to:
                {{examples_str}}
                REMEMBER TO REPLY WITH THE CORRECT JSON FORMAT. Do NOT EXPLAIN ! IF NO DISINFORMATION IS PRESENT, RETURN "category: "0_accepted".\n 
                Answer:

            """,
            template_format="jinja2",
            # input_variables=["text", "claim"],
        )
        chain = prompt | self.model

        messages = state["messages"]
        last_message = json.loads(messages[-2].content)
        text = last_message["text"]
        claim = last_message["claim"]

        examples_str = messages[-1].content
        # examples_str = "\n".join(
        #     [
        #         f"Claim: {example['quote']}\nLabel: {example['label']}\n"
        #         for example in examples
        #     ]
        # )

        reply = chain.invoke(
            {"text": text, "claim": claim, "examples_str": examples_str}
        )
        self.__print(reply.content)
        claim_classification = self.load_json_string(reply.content, json.dumps(["analysis", "category"]))["category"]
        # claim_classification = json.loads(reply.content)["category"]

        self.__print({"classification": claim_classification, "text": text, "claim": claim})
        response = json.dumps(
            {"classification": claim_classification, "text": text, "claim": claim},
        )
        self.__print("CARDS Classification:")
        self.__print(claim_classification)
        return {"messages": [response], "senders": ["claim_classificator"]}

    def double_check(self, state: AgentState):
        """
        Double check the claim classification.

        Args:
            state (messages): The current state

        Returns:
            dict: The updated state with the agent response appended to messages
        """

        self.__print("----- Double Check -----")

        prompt = PromptTemplate(
            template="""
                You are an expert in disinformation on environmental and climate subjects,
                expert in climate science and know everything about the GIEC. 
                I am going to give you a media transcript and a classification. 
                Your task is to examine the transcript and the classification and decide if it is correct
                If it is correct answer with "yes", otherwise answer with "no".

                Categories:
                - 0_accepted: No relevant environmental disinformation claim detected.
                - 1_not_happening: Global warming is not happing. Climate change is NOT leading to melting ice (such as glaciers, sea ice, and permafrost), increased extreme weather, or rising sea levels. Cold weather also shows that climate change is not happening.
                - 2_not_human: Greenhouse gases from humans are not the causing climate change.
                - 3_not_bad: The impacts of climate change will not be bad and might even be beneficial.
                - 4_solutions_wont_work: Climate solutions are harmful or unnecessary.
                - 5_science_is_unreliable: Climate science is uncertain, unsound, unreliable, or biased.
                - 6_scientists_are_biased: Climate scientists and proponents of climate action are alarmist, biased, wrong, hypocritical, corrupt, and/or politically motivated.
                - 7_need_fossil_fuel: We need fossil fuels for economic growth, prosperity, and to maintain our standard of living.

                Be very careful not to classify a transcript as a false positive: the main objective is to assign a category related to disinformation
                ONLY TO TRANSCRIPTS THAT ACTUALLY CONTAIN ANY.

                Transcript: {text}
                Classification: {classification}
                REMEMBER TO REPLY ONLY 'yes' or 'no'. Do NOT EXPLAIN ! \n 
                Answer:
            """,
                # Claim: {claim}
            input_variables=["text", "claim", "classification"],
        )
        chain = prompt | self.model

        messages = state["messages"]
        last_message = json.loads(messages[-1].content)
        text = last_message["text"]
        # claim = last_message["claim"]
        classification = last_message["classification"]

        reply = chain.invoke(
            # {"text": text, "claim": claim, "classification": classification}
            {"text": text, "classification": classification}
        )
        double_check = ClassificationCheck(correct=reply.content).correct
        self.__print("Do you agree with the classification? ")
        self.__print(double_check)
        return {"messages": [double_check], "senders": ["double_check"]}

    def double_check_edge(
        self, state: AgentState
    ) -> Literal["claim_extractor", "returner"]:
        """
        Routes to either the final conclusion after the double check or to the claim extractor if the double check is not conclusive.
        """
        if state["messages"][-1].content == "yes":
            return "returner"
        else:
            return "claim_extractor"
        
    def dump_json_string(self, json_string:str, keys: List[str], n_iter=1, max_iter=5):
        """Recursive function that formats the string into proper json format"""
        try:
            return json.dumps(json_string)
        except JSONDecodeError as e:
            if n_iter > max_iter:
                raise e
            prompt = PromptTemplate(
                template="""
                    Your are a formatter, you are given a string that needs to be formatted into a proper JSON string.
                    For example the following JSON string with strings ["key_1", "key_2"]:
                    '{
                        "key_1": "value_1"
                        "key_2": "value_2"
                    }'
                    becomes:
                    '{
                        "key_1": "value_1" 
                        "key_2": "value_2"
                    }'
                    ; and
                    '{
                        "key_1": "value_1",
                        "key_2": "value_2",'
                    becomes
                    '{
                        "key_1": "value_1",
                        "key_2": "value_2"
                    }'
                    ; and
                    '{
                        "key_1": "value_1",
                        "value_2"
                    }'
                    becomes
                    '{
                        "key_1": "value_1",
                        "key_2": "value_2"
                    }'
                    Correct the following string into a proper json string:
                    {{json_string}}
                    Here are the following keys for the json document:
                    {{keys}}
                """,
                    # Claim: {claim}
                template_format="jinja2",
            )
            chain = prompt | self.model
            chain.invoke({"json_string": json_string, "keys": keys})
            return self.dump_json_string(json_string, keys, n_iter=n_iter+1)
        
    def load_json_string(self, json_string:str, keys: List[str], n_iter=1, max_iter=5):
        """Recursive function that formats the string into proper json format"""
        if not json_string.endswith("}") and json_string.startswith("{"):
            json_string = json_string + "}"
        if not json_string.endswith("]") and json_string.startswith("["):
            json_string = json_string + "]"
        try:
            return json.loads(json_string)
        except JSONDecodeError as e:
            if n_iter > max_iter:
                raise e
            prompt = PromptTemplate(
                template="""
                    Correct the following string into a proper json string:
                    {{json_string}}
                    Here are the following keys for the json document:
                    {{keys}}
                """,
                    # Claim: {claim}
                template_format="jinja2",
            )
            chain = prompt | self.model
            chain.invoke({"json_string": json_string, "keys": keys})
            return self.load_json_string(json_string, keys, n_iter=n_iter+1)

    def returner(self, state: AgentState):
        """
        Returns the final conclusion after the double check.
        Args:
            state (messages): The current state

        Returns:
            dict: The updated state with the agent response appended to messages
        """
        self.__print("----- Return Result -----")
        return {"messages": [state["messages"][-1].content], "senders": ["returner"]}

In [149]:
from langchain_openai import ChatOpenAI
from langchain_ollama import ChatOllama
from dotenv import load_dotenv

load_dotenv()

agent = RagAgent(
    model=ChatOllama(model="llama3.1:8b", temperature=0),
    tools=[qdrant_retriever_tool],
    verbose=True,
    graph=StateGraph(AgentState),
).compile()
# agent = RagAgent(
#     model=ChatOpenAI(model_name="gpt-4o-mini", temperature=0),
#     tools=[qdrant_retriever_tool],
#     verbose=True,
#     graph=StateGraph(AgentState),
# ).compile()

# agent

In [150]:
import pandas as pd

data = pd.read_parquet("../../data/raw/4_channels_predictions_09_2023_09_2024.parquet")

data.text.head()

id
1dcd4b454f8bac42440259ce26a1a2192051186bb5728be489dc654a9a967d1d    <unk> <unk> <unk> <unk> aerosmith en janvier m...
0eb5805fa23e0819f817ea10fe1fccd19e61e40a1239cc93f701fd56bd8ea66f    la très grande majorité d'entre eux ne connais...
b6d54aefb250671e7754a688411ce9e68badcad88a665be3de78996d13b74fd2    mais titeuf ne vieillit pas le monde change ti...
23c2d3b292d9ab0fb0d0b2b8c34f3b88708c79ffab2d6659accf565ba61f48ae    dû travailler très vite le journal arrive à no...
71df0ce2b34afa23391d8e31d35ccd213ae2a881b7ce412813a06a60a2e47d3c    pas avoir lieu ni même européens existait ni l...
Name: text, dtype: object

In [151]:
text = data.iloc[35]["text"]

result = agent.invoke(
    {
        "messages": [
            ("user", text),
        ]
    }
)

----- Classifying text -----
Decision: Concerns Climate? no
----- Return None -----


In [152]:
df = pd.read_excel("/Users/giuseppeguarino/Documents/d4g/climateguard/data/annotated/4_channels_review_09_2023_09_2024.xlsx")

In [153]:
df = df.drop(df.loc[df.cards_ground_truth.isna()].index)
df


Unnamed: 0,id,channel_name,start,text,quote,claim,analysis,context,cards,cards_ground_truth,quote_is_correct,commentaire_cards,commentaire_quote,Highlight
0,52061c3902c0257c7bfae7086ae50ea3998fea4204bcd6...,europe1,2023-09-08 19:34:00,Ce n'est pas seulement qu'ils ne les utilisent...,les climatologues ne croient plus leurs termes...,Les climatologues ne croient plus leurs termes...,Cette affirmation pourrait créer un doute sur ...,L'intervenant mentionne une perte de confiance...,5_science_uncertain,0_accepted,True,,termes honnêtes = thermomètre ??,
1,34a41bf34b35ee91fc147601fb8c21a366a2f568b9060b...,europe1,2023-09-15 19:30:00,"Jusqu'au trente septembre, détaille Sofia au q...",Il y a beaucoup de soldats poneys menteurs à t...,Il y a beaucoup de soldats poneys menteurs à t...,Cette allégation semble suggérer que le réchau...,"Lors d'une conférence de presse au Vietnam, le...",5_science_uncertain,0_accepted,True,"Contexte spécifique de la phrase, les mots de ...",,
2,0fb8db32982baea27fa4a92220e76331e703d61852c0e8...,europe1,2023-09-20 07:50:00,On tente d'échapper à une tempête de pluie d'a...,tempête de pluie d'acide mortelle selon l'inrs...,tempête de pluie d'acide mortelle enregistrée ...,L'allégation mentionne une tempête de pluie d'...,Dans une discussion sur un film catastrophe éc...,1_its_not_happening,0_accepted,True,"Contexte spécifique de la phrase, la claim men...",,
3,4792f93c6614b1e7ef39e301cc6c1d0f4d3d18b9421cc3...,europe1,2023-10-02 07:54:00,"Les Français n'arrivent pas à se loger, c'est ...",Ça va de l'interdiction des avions d'affaires ...,Diminuer le nombre de vaches et contrôler les ...,Cette affirmation présente des mesures qui n'o...,L'interlocuteur mentionne plusieurs mesures ex...,3_impacts_not_bad,6_proponents_biased,True,,,
4,74c05fdbca4aeffb643abf0de486b57f8299051d4ef71b...,europe1,2023-10-05 19:54:00,Le réchauffement climatique a été collecté à G...,on ne peut pas faire le lien directement avec ...,On ne peut pas faire le lien directement avec ...,Cette assertion minimise le consensus scientif...,Luc Moreau énonce lui-même qu'il est difficile...,2_humans_not_the_cause,2_humans_not_the_cause,True,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
540,22d30e47c43d8defe1bdab51e6e4c6374ba982d76de889...,tf1,2024-04-23 20:36:00,"Plus chaud de ces dix dernières années, le tem...",Il n'y a plus ni eau ni printemps.,Il n'y a plus ni eau ni printemps.,Cette affirmation semble exagérée et pourrait ...,L'orateur évoque un changement climatique qui ...,1_its_not_happening,0_accepted,False,,"Problème de retranscription, plus automne ni p...",
541,58feacdc4a4d31023a87bce158ee902d37076d9f5562f3...,tf1,2024-04-28 20:40:00,"Aujourd'hui, ce fameux pacte d'immigration qui...",ce tsunami de normes de contraintes de taxe qu...,Le pacte vert européen organise la décroissance.,Cette affirmation attribue une intention de dé...,Lors d'un discours critique à l'égard des poli...,4_solutions_harmful_unnecessary,4_solutions_harmful_unnecessary,True,,,
542,6d39a0d45c359fd2183c0f1f001b76c4b60d283240e5c3...,tf1,2024-06-16 18:04:00,"Avec l'aide de nos fournisseurs, si il s'avère...",l'entreprise n'a pas voulu nous dire combien d...,L'huile de palme serait aujourd'hui une soluti...,Cette allégation semble minimiser les impacts ...,Dans un débat sur les pratiques de certains fo...,4_solutions_harmful_unnecessary,0_accepted,True,Potentiellement plutôt du greenwashing,,
543,0718ef4df3559735ad33436f7e1e39802a71d923cbc0b9...,tf1,2024-07-21 17:14:00,"En France, plusieurs millions de maisons sont ...",le dérèglement climatique phénomène naturel.,Le dérèglement climatique est un phénomène nat...,Cette affirmation contredit le consensus scien...,Une affirmation selon laquelle les changements...,2_humans_not_the_cause,0_accepted,True,Légitime mais tellement court que ça a l'air d...,,


In [154]:
text = df.iloc[52]["text"]

print(f"Ground Truth: {df.iloc[52]['cards_ground_truth']}")

result = agent.invoke(
    {
        "messages": [
            ("user", text),
        ]
    }
)
result

Ground Truth: 0_accepted
----- Classifying text -----
Decision: Concerns Climate? yes
----- Extract claim -----
Une étude d'impact a obligé à dire la réalité aux citoyens : ces métaux, nous devons les importer et donc signer des accords de libre-échange.
----- Classificator Retrieve similar records -----
Found:
*6_proponents_biased: Without a hearing, German officials have fined her and demanded costs on the ground that in her devastatingly effective videos she has dared to question the Party Line about what officialdom profiteers by presenting as ‘dangerous’ manmade global warming,” Monckton claimed at JunkScience s a result of this arbitrary and capricious prosecution and conviction without trial, Naomi has had her earnings cut off. Please donate securely and directly to her Patreon account at https://www.patreon.com/naomiseibtmy. May I suggest at least $10 per month? That would be a real life-saver, and would enable Naomi to continue her valuable work.
*6_proponents_biased: Without 

{'messages': [HumanMessage(content="Elle a la fâcheuse habitude de penser qu'elle peut fixer un objectif sur un coin de table et que cela suffit à faire une politique publique, sans jamais demander comment ni avec quels effets l'intendance suivra. Elle a fait exactement la même chose avec l'agriculture. On en a beaucoup parlé ici. La seconde raison pour laquelle elle a soigneusement évité d'étudier l'impact, c'est qu'elle pressentait que les réponses obligeraient à confronter la pensée magique à la réalité, une réalité qui supposait de tordre le cou à un certain nombre de dogmes écologiques. Et ça, ça n'a pas bonne presse. Alors, ces dogmes, en l'occurrence, viennent d'abord du dogme de la décroissance, qui continue à façonner la pensée de toute une partie de la gauche européenne. Pour transformer le parc automobile, il faut construire de grandes usines de pièces de batteries. Or, il est devenu quasiment impossible d'en implanter en France et dans toute une partie de l'Europe. Elles fo

In [155]:
df.cards_ground_truth.value_counts()


cards_ground_truth
0_accepted                         118
4_solutions_harmful_unnecessary     88
6_proponents_biased                 70
1_its_not_happening                 31
5_science_uncertain                 22
2_humans_not_the_cause              19
3_impacts_not_bad                   15
7_fossil_fuels_needed                7
Name: count, dtype: int64

In [161]:
import tqdm
from tqdm.asyncio import tqdm as atqdm
import asyncio
from functools import wraps
from langgraph.errors import GraphRecursionError

agent = RagAgent(
    # model=ChatOpenAI(model_name="gpt-4o-mini"),
    model=ChatOllama(model="llama3.1:8b", temperature=0),
    tools=[qdrant_retriever_tool],
    verbose=False,
    graph=StateGraph(AgentState),
).compile()

async def arun_agent(text, id, sem):
    async with sem:
        try:
            result = await agent.ainvoke({"messages": [("user", text)]}, {"recursion_limit": 5})
            return id, result["messages"][-1].content
        except GraphRecursionError:
            return id, "None"
        
def run_agent(text, id):
    result = agent.invoke({"messages": [("user", text)]}, {"recursion_limit": 10})
    return id, result["messages"][-1].content
    

semaphore = asyncio.Semaphore(10)

async def amain(sem):
    results = await atqdm.gather(*[arun_agent(row.text, row.id, sem) for row in df.itertuples()])
    return results

def main():
    results = [run_agent(row.text, row.id) for row in tqdm.tqdm(df.itertuples(), total=len(df))]
    return results

# results = await amain(semaphore)
results = main()



100%|██████████| 370/370 [45:56<00:00,  7.45s/it]  


In [162]:
ids = []
records = []
for idx, record in results:
    if record != "None":
        ids.append(idx)
        records.append(json.loads(record))
results_df = pd.DataFrame(records, index=ids)
display(results_df.head())


Unnamed: 0,classification,text,claim
52061c3902c0257c7bfae7086ae50ea3998fea4204bcd6628588e41d71340dfc,0_accepted,Ce n'est pas seulement qu'ils ne les utilisent...,"Nous gaspillons l'énergie, nous gaspillons les..."
34a41bf34b35ee91fc147601fb8c21a366a2f568b9060bbb629698dfd9319801,6_scientists_are_biased,"Jusqu'au trente septembre, détaille Sofia au q...",Joe Biden a fait des déclarations confuses lor...
0fb8db32982baea27fa4a92220e76331e703d61852c0e8d95550ef6853ffd842,0_accepted,On tente d'échapper à une tempête de pluie d'a...,Des pluies mille fois plus acides que la norma...
4792f93c6614b1e7ef39e301cc6c1d0f4d3d18b9421cc3a9b18aa5c7581c9e02,6_scientists_are_biased,"Les Français n'arrivent pas à se loger, c'est ...",La première voudrait bouleverser l'existence d...
74c05fdbca4aeffb643abf0de486b57f8299051d4ef71b58b532791a33aba423,1_not_happening,Le réchauffement climatique a été collecté à G...,Il y a quand même un réchauffement à ces altit...


In [163]:
df.to_csv("results_llama3-1-8b.csv")

In [164]:
results_df.classification.str[0].value_counts()

classification
6    68
4    68
0    50
1    31
3    31
2    22
5    19
7     7
Name: count, dtype: int64

In [165]:
comparison_df = df.set_index("id")[["text", "quote", "claim", "cards_ground_truth"]].merge(
    results_df["classification"],
    how="left",
    right_index=True,
    left_index=True
).fillna("0_accepted")



In [166]:
display(comparison_df.cards_ground_truth.str[0].value_counts())
display(comparison_df.classification.str[0].value_counts())

cards_ground_truth
0    118
4     88
6     70
1     31
5     22
2     19
3     15
7      7
Name: count, dtype: int64

classification
0    124
6     68
4     68
1     31
3     31
2     22
5     19
7      7
Name: count, dtype: int64

In [167]:
(comparison_df.cards_ground_truth.str[0] == comparison_df.classification.str[0]).sum() / len(comparison_df)

np.float64(0.3837837837837838)

In [168]:
((comparison_df.cards_ground_truth.str[0]=="0") == (comparison_df.classification.str[0]=="0")).sum() / len(comparison_df)

np.float64(0.6756756756756757)

In [169]:
from sklearn.metrics import classification_report

print(classification_report(
    y_pred=comparison_df.classification.str[0],
    y_true=comparison_df.cards_ground_truth.str[0],
))

              precision    recall  f1-score   support

           0       0.49      0.52      0.50       118
           1       0.35      0.35      0.35        31
           2       0.18      0.21      0.20        19
           3       0.06      0.13      0.09        15
           4       0.46      0.35      0.40        88
           5       0.21      0.18      0.20        22
           6       0.35      0.34      0.35        70
           7       0.71      0.71      0.71         7

    accuracy                           0.38       370
   macro avg       0.35      0.35      0.35       370
weighted avg       0.40      0.38      0.39       370



In [170]:
import numpy as np

comparison_df["macro_category_ground_truth"] = "accepted"
comparison_df.loc[comparison_df.cards_ground_truth.str[0].isin(["1", "2", "3", "5"]), "macro_category_ground_truth"] = "disinformation"
comparison_df.loc[comparison_df.cards_ground_truth.str[0].isin(["4", "6", "7"]), "macro_category_ground_truth"] = "inaction"

comparison_df["macro_category_prediction"] = "accepted"
comparison_df.loc[comparison_df.classification.str[0].isin(["1", "2", "3", "5"]), "macro_category_prediction"] = "disinformation"
comparison_df.loc[comparison_df.classification.str[0].isin(["4", "6", "7"]), "macro_category_prediction"] = "inaction"


display(comparison_df.cards_ground_truth.str[0].value_counts())
display(comparison_df.classification.str[0].value_counts())
display(comparison_df.macro_category_ground_truth.value_counts())
display(comparison_df.macro_category_prediction.value_counts())

cards_ground_truth
0    118
4     88
6     70
1     31
5     22
2     19
3     15
7      7
Name: count, dtype: int64

classification
0    124
6     68
4     68
1     31
3     31
2     22
5     19
7      7
Name: count, dtype: int64

macro_category_ground_truth
inaction          165
accepted          118
disinformation     87
Name: count, dtype: int64

macro_category_prediction
inaction          143
accepted          124
disinformation    103
Name: count, dtype: int64

In [171]:
print(classification_report(
    y_pred=comparison_df.macro_category_ground_truth,
    y_true=comparison_df.macro_category_prediction,
))

                precision    recall  f1-score   support

      accepted       0.52      0.49      0.50       124
disinformation       0.48      0.41      0.44       103
      inaction       0.52      0.60      0.56       143

      accuracy                           0.51       370
     macro avg       0.51      0.50      0.50       370
  weighted avg       0.51      0.51      0.51       370



In [172]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(
    y_true=comparison_df.cards_ground_truth.str[0],
    y_pred=comparison_df.classification.str[0]
)

cm_macro = confusion_matrix(
    y_true=comparison_df.macro_category_ground_truth,
    y_pred=comparison_df.macro_category_prediction
)

In [173]:
import plotly.figure_factory as ff

def plot_confusion_matrix(cm, labels=None, title='Confusion Matrix', cmap='Blues'):
    cm_text = [[str(y) for y in x] for x in cm]
    if labels is None:
        labels = [str(i) for i in range(len(cm))]
        
    fig = ff.create_annotated_heatmap(
        cm,
        x=labels,
        y=labels,
        colorscale=cmap,
        annotation_text=cm_text
    )
    fig.update_layout(
        title_text='<i><b>Confusion matrix</b></i>',
        #xaxis = dict(title='x'),
        #yaxis = dict(title='x')
    )

    # add custom xaxis title
    fig.add_annotation(
        dict(
            font=dict(
                color="black",
                size=14
            ),
            x=0.5,
            y=-0.15,
            showarrow=False,
            text="Predicted value",
            xref="paper",
            yref="paper"
        )
    )

    # add custom yaxis title
    fig.add_annotation(dict(font=dict(color="black",size=14),
                            x=-0.35,
                            y=0.5,
                            showarrow=False,
                            text="Real value",
                            textangle=-90,
                            xref="paper",
                            yref="paper"))

    # adjust margins to make room for yaxis title
    fig.update_layout(margin=dict(t=50, l=200))

    # add colorbar
    fig['data'][0]['showscale'] = True
    return fig

In [174]:
display(plot_confusion_matrix(cm, labels=None, title='Confusion Matrix', cmap='Blues'))
display(plot_confusion_matrix(cm_macro, labels=["accepted", "disinformation", "inaction"], title='Confusion Matrix', cmap='Blues'))