# Libraries and preparation

refs:
- https://github.com/langchain-ai/langgraph/blob/main/examples/rag/langgraph_rag_agent_llama3_local.ipynb
- https://github.com/langchain-ai/langgraph/blob/main/examples/multi_agent/agent_supervisor.ipynb?ref=blog.langchain.dev
- https://github.com/langchain-ai/langgraph/blob/main/examples/multi_agent/hierarchical_agent_teams.ipynb?ref=blog.langchain.dev

MAP:REDUCE: https://langchain-ai.github.io/langgraph/how-tos/map-reduce/

In [None]:
import subprocess
import threading

#istallazione di ollama
!curl -fsSL https://ollama.com/install.sh | sh

In [None]:
def start_ollama():
    t = threading.Thread(target=lambda: subprocess.run(["ollama", "serve"]),daemon=True)
    t.start()

In [None]:
def pull_model(local_llm):
    !ollama pull local_llm

In [None]:
def start_model(local_llm):        
    t2 = threading.Thread(target=lambda: subprocess.run(["ollama", "run", local_llm]),daemon=True)
    t2.start()

In [None]:
%%capture --no-stderr
%pip install -U scikit-learn==1.3 langchain-ai21 ragas langchain-pinecone langchain-nomic langchain_community tiktoken langchainhub chromadb langchain langgraph tavily-python nomic[local] langchain-text-splitters

In [None]:
# Tracing and api-keys
import os

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["TAVILY_API_KEY"] = "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"
os.environ["LANGCHAIN_API_KEY"] = "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"
os.environ["PINECONE_API_KEY"] = "94ef7896-1fae-44d3-b8d2-0bd6f5f664f5"
os.environ["AI21_API_KEY"] = "KlINkh5QKw3hG1b5Hr75YDO7TwGoQvzn"

Bias detection model:

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import pipeline
import torch

device = 0 if torch.cuda.is_available() else -1

bias_model_tokenizer = AutoTokenizer.from_pretrained("d4data/bias-detection-model")
bias_model = AutoModelForSequenceClassification.from_pretrained("d4data/bias-detection-model",from_tf=True)

- https://shap.readthedocs.io/en/latest/example_notebooks/text_examples/text_entailment/Textual%20Entailment%20Explanation%20Demo.html
- https://huggingface.co/facebook/bart-large-mnli

Entailment model (BART):

In [None]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch

device = 0 if torch.cuda.is_available() else -1

bart_model = AutoModelForSequenceClassification.from_pretrained("facebook/bart-large-mnli",device=device)
bart_tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-mnli")

In [None]:
def BART_prediction(premise,hypothesis):
    #print(f"Premise: {premise}")
    #print(f"Hypo: {hypothesis}")
    input_ids = bart_tokenizer.encode(premise, hypothesis, return_tensors="pt")
    logits = bart_model(input_ids)[0]
    probs = logits.softmax(dim=1)

    max_index = torch.argmax(probs).item()

    bart_label_map = {0: "contradiction", 1: "neutral", 2: "entailment"}
    return bart_label_map[max_index]

In [None]:
from transformers import BertTokenizer, BertForSequenceClassification

BERT_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
BERT_model = BertForSequenceClassification.from_pretrained('/kaggle/input/bert_hate_speech/pytorch/default/1/fine_tuned_bert')
BERT_model.eval()

In [None]:
def BERT_hate_speech(text):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    BERT_model.to(device)

    encoded_dict = BERT_tokenizer.encode_plus(
                    text,
                    add_special_tokens = True,
                    max_length = None, #Uso della massima lunghezza del modello, nel caso di BERT 512 tokens
                    padding = "max_length",
                    truncation = True,
                    return_attention_mask = True,
                    return_tensors = 'pt',
                )

    text_ids = encoded_dict['input_ids'].to(device)
    attention_mask = encoded_dict['attention_mask'].to(device)

    with torch.no_grad():
        outputs = BERT_model(input_ids=text_ids, attention_mask=attention_mask)

    # Estrai i logits (output non normalizzati del modello)
    logits = outputs.logits

    # Converti i logits in probabilità (se necessario)
    probs = torch.softmax(logits, dim=1)

    # Identifica la classe con la probabilità più alta
    predicted_class = torch.argmax(probs, dim=1)
    
    #Label 0, no hate speech, Label 1 hate speech
    if predicted_class==0: response = "no"
    else: response = "yes"

    return response

# Tools

refs:
- https://python.langchain.com/v0.2/docs/integrations/tools/tavily_search/

In [None]:
### Web Search
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_text_splitters import RecursiveCharacterTextSplitter

web_search_tool = TavilySearchResults(k=2)

# Indexing

Organizing external sources for the llm. Phase of indexing and chunking of docs refs:
- https://python.langchain.com/v0.1/docs/modules/data_connection/document_loaders/
- https://python.langchain.com/v0.1/docs/modules/data_connection/vectorstores/
- https://python.langchain.com/v0.1/docs/modules/data_connection/document_transformers/recursive_text_splitter/
- Nomic embeddings: https://docs.nomic.ai/atlas/capabilities/embeddings#selecting-a-device

osservazione: si possono controllare gli indici direttamente da https://app.pinecone.io/organizations/-O2Tiw_0VD7HTOASPJE5/projects/2a95c518-e514-4d39-bed8-4b12fd90ad44/indexes

osservazione sul chuncking: https://dev.to/peterabel/what-chunk-size-and-chunk-overlap-should-you-use-4338

In [None]:
from langchain_pinecone import PineconeVectorStore
from langchain_ai21 import AI21Embeddings

def create_retriever(index_name,top_k):
    vectorstore = PineconeVectorStore(
        index_name=index_name,
        embedding=AI21Embeddings(device="cuda")
    )
    return vectorstore.as_retriever(search_kwargs={"k": top_k})

def create_KBT_retrievers(aspects,top_k):
    retrievers = []
    for aspect in aspects:
        retriever = create_retriever(f"{aspect.lower()}-kbt",top_k)
        retrievers.append(retriever)
    return retrievers

# Prompt support

In [None]:
from langchain_community.llms import Ollama
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser

def create_prompt(system_prompt, input_variables, model, examples=None):
    input_var_str = [f"{input_var}" for input_var in input_variables]
    if "llama3.1" in model:
        prompt = "<|begin_of_text|><|start_header_id|>system<|end_header_id|> "+ system_prompt
    else:
        prompt = system_prompt
    
    if examples!=None:
        prompt = prompt + "\nHere some examples: \n"
        for example in examples:
            prompt = prompt + "\n"
            for key in input_var_str:
                prompt = prompt + key + ": " + example[key] +"\n"
            prompt = prompt + "ouput: " + example["output"] +"\n"   
        prompt = prompt + "\n"
    
    if "llama3.1" in model:
        prompt = prompt + "<|eot_id|><|start_header_id|>user<|end_header_id|> \n"
    for key in input_var_str:
            prompt = prompt + key + ": " +f"{{{key}}}" +"\n"
    if "llama3.1" in model:
        prompt = prompt + "output:" + " <|eot_id|><|start_header_id|>assistant<|end_header_id|>"
    prompt_template = PromptTemplate(template = prompt, input_variables=input_var_str)
    
    #Debug:
    """
    dict_input = {}
    for input_var in input_variables:
        dict_input[f'{input_var}'] = "USER_SUBMISSION"
    print(prompt_template.format(**dict_input))
    """
    
    return prompt_template

In [None]:
def call_model(llm: str, prompt: str, input_variables:list[str], examples=None):
    model = Ollama(model=llm, temperature=0)
    prompt_final = create_prompt(prompt, input_variables, llm, examples)
    return prompt_final | model | StrOutputParser()

In [None]:
def get_prompt(path):
    with open(path, 'r') as file:
        prompt = file.read()
    return prompt

def get_examples(path):
    with open(path, 'r') as file:
        examples = file.read()
    return eval(examples)

https://learnprompting.org/docs/reliability/debiasing

# Aspect agents

refs
- https://www.langchain.com/langgraph

In [140]:
from pprint import pprint
from typing import List, Annotated
import operator
import functools
import sklearn.metrics
import numpy as np

from langchain_core.documents import Document
from typing_extensions import TypedDict

from langgraph.graph import END, StateGraph, START

### State
class GraphState(TypedDict):
    """
    Represents the state of graph of aspect agents.
    """
    
    original_query: str
    query: str
    aspect: str
    aspect_id: int
    answers_agent: Annotated[List[str], operator.add]
    ord_aspects: Annotated[List[str], operator.add]
    my_answer: str
    web_search: str
    documents: List[str]
    documents_kbt: List[str]
        
def rewrite_query(state,verbose,llm,observer):
    if verbose: 
        print("---REWRITING QUERY---")
        print(f"State: {state}")
    original_query = state["original_query"]
    aspect = state["aspect"]
    
    prompt = get_prompt("/kaggle/input/prompts/prompts/query_rewriting.txt")
    examples = get_examples("/kaggle/input/prompts/prompts/query_rewriting_examples.txt")
    chain = call_model(llm, prompt, ["original_query","aspect"], examples)
    generation = chain.invoke({"original_query": original_query, "aspect": aspect})
    #print(f"Aspect {aspect} query: {generation}")
    #print(list(generation.values())) #Debug
    if observer!=None:
        observer.aspects[f"{aspect}"] = {"aspect_query": generation}
    return {"query": generation}


def retrieve(state,verbose,retriever,retrievers_KBT):
    if verbose: 
        print("---RETRIEVE---")
        print(f"State: {state}")
        
    query = state["query"]
    aspect_id = state["aspect_id"]

    # Retrieval
    documents = retriever.invoke(query)
    documents_kbt = retrievers_KBT[aspect_id].invoke(query)
    
    #pprint(f"Documents retrieved: {documents}")
    #pprint(f"Documents KBT retrieved: {documents_kbt}")
    
    return {"documents": documents, "documents_kbt": documents_kbt, "query": query}


def generate(state,verbose,llm,fairness,observer):
    if verbose:
        print("---GENERATE---")
        print(f"State: {state}")
    query = state["query"]
    documents = state["documents"]
    aspect = state["aspect"]

    # RAG generation
    prompt = get_prompt("/kaggle/input/prompts/prompts/generating_answer.txt")
    chain = call_model(llm, prompt, ["context","question"])
    documents_content = [d.page_content for d in documents]
    generation = chain.invoke({"context": documents_content, "question": query})
    
    aspect_id = state["aspect_id"]
    #print(f"Aspect agent {aspect_id} generates: {generation}") #Debug
    if observer!=None:
            observer.aspects[f"{aspect}"]["documents_for_generation"] = documents_content
            observer.aspects[f"{aspect}"]["answer"] = generation
            observer.aspects[f"{aspect}"]["debiased_answer"] = "//"
    if fairness:
        return {"documents": documents, "query": query, "my_answer": generation}
    return {"documents": documents, "query": query, "my_answer": generation, "answers_agent": [generation], "ord_aspects": [state["aspect"]]}


def confirm_answer(state,verbose):
    if verbose:
        print("---CONFIRM ANSWER---")
        print(f"State: {state}")
    my_answer = state["my_answer"]
    aspect = state["aspect"]

    #print(f"Answer for aspect {aspect}: {my_answer}")
    return {"answers_agent": [my_answer], "ord_aspects": [aspect]}


def grade_documents(state,verbose,llm,observer):
    if verbose:
        print("---CHECK DOCUMENT RELEVANCE TO QUESTION---")
        print(f"State: {state}")
    query = state["query"]
    documents = state["documents"]
    aspect = state["aspect"]
    
    prompt = get_prompt("/kaggle/input/prompts/prompts/retrieval_grader.txt")
    chain = call_model(llm, prompt, ["question","document"])

    # Score each doc
    filtered_docs = []
    not_relevant_docs = []
    web_search = "No"
    for d in documents:
        score = chain.invoke(
            {"question": query, "document": d.page_content}
        )
        #grade = score["score"]
        # Document relevant
        if "yes" in score.lower():
            if verbose: print("---GRADE: DOCUMENT RELEVANT---")
            filtered_docs.append(d)
        # Document not relevant
        else:
            if verbose: print("---GRADE: DOCUMENT NOT RELEVANT---")
            # We do not include the document in filtered_docs
            # We set a flag to indicate that we want to run web search
            not_relevant_docs.append(d.page_content)
            web_search = "Yes"
            continue
    if observer!=None:
            observer.aspects[f"{aspect}"]["web_search"] = web_search
            observer.aspects[f"{aspect}"]["not_relevants_docs"] = not_relevant_docs
    return {"documents": filtered_docs, "query": query, "web_search": web_search}


def web_search(state,verbose):
    if verbose:
        print("---WEB SEARCH---")
        print(f"State: {state}")
    query = state["query"]
    documents = state["documents"]

    # Web search
    docs = web_search_tool.invoke({"query": query})
    #print(f"docs from web: {docs}") #Debug
    web_results = "\n".join([d["content"] for d in docs])
    web_results = Document(page_content=web_results)
    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=250, chunk_overlap=0
    )

    doc_splits = text_splitter.split_documents([web_results])
    for doc in doc_splits:
        if documents is None:
            documents = [doc]
        else:
            documents.append(doc)
    return {"documents": documents, "query": query}


def hate_speech_filter(state,verbose,llm,encoder,observer):
    if verbose:
        print("---HATE SPEECH FILTER---")
        print(f"State: {state}")
    documents = state["documents"]
    aspect = state["aspect"]
    
    prompt = get_prompt("/kaggle/input/prompts/prompts/hate_speech.txt")
    examples = get_examples("/kaggle/input/prompts/prompts/hate_speech_examples.txt")
    chain = call_model(llm, prompt, ["input"], examples)

    # Score each doc
    filtered_docs = []
    hate_speech_docs = []
    for d in documents:
        if encoder:
            score = BERT_hate_speech(d.page_content)
        else:
            score = chain.invoke(
                {"input": d.page_content}
            )
        #grade = score["score"]
        if "no" in score.lower():
            if verbose: print("---DOCUMENT ACCEPTED---")
            filtered_docs.append(d)
        else:
            if verbose: print("---DOCUMENT HATEFUL---")
            hate_speech_docs.append(d.page_content)
    if observer!=None:
        observer.aspects[f"{aspect}"]["hate_speech_docs"] = hate_speech_docs
    return {"documents": filtered_docs}


def entailment_filter(state,BART_model,strategy_entailment,neutral_acceptance,verbose,observer,llm,shots):    
    if verbose:
        print("---ENTAILMENT FILTER---")
        print(f"State: {state}")
    query = state["query"]
    documents = state["documents"]
    documents_KBT = state["documents_kbt"]
    aspect_id = state["aspect_id"]
    aspect = state["aspect"]
    
    prompt = get_prompt("/kaggle/input/prompts/prompts/entailment_checker.txt")
    examples = get_examples("/kaggle/input/prompts/prompts/entailment_checker_examples.txt")
    if shots>0:
        chain = call_model(llm, prompt, ["premise","hypothesis"], examples[:shots])
    else:
        chain = call_model(llm, prompt, ["premise","hypothesis"])

    # Score each doc
    filtered_docs = []
    #counter_docs = 0 # Debug
    #La procedura si può semplificare: TODO
    if strategy_entailment: #Skeptical
        for d in documents:
            #counter_docs = counter_docs + 1 #Debug
            neutral = True
            for d_kbt in documents_KBT:
                if BART_model:
                    score = BART_prediction(d_kbt.page_content,d.page_content)
                else:
                    score = chain.invoke(
                        {"premise": d_kbt.page_content, "hypothesis": d.page_content}
                    )
                #grade = score["score"]
                
                #if score.lower() == "neutral": print(f"---Neutral {counter_docs}---") #Debug
                #if score.lower() == "entailment": print(f"---Entailment {counter_docs}---") #Debug
                #if score.lower() == "contradiction": print(f"---Contradiction {counter_docs}---") #Debug
                
                if not "neutral" in score.lower():
                    neutral = False
                if "contradiction" in score.lower():
                    # contradiction found
                    break
            if (not neutral or neutral_acceptance) and score.lower() != "contradiction":
                filtered_docs.append(d)
                #print(f"---Document accepted {counter_docs}---")  #Debug            
                if verbose: print("---DOCUMENT ENTAILED---")   
    else: #Credolous
        for d in documents:
            #counter_docs = counter_docs + 1 #Debug
            neutral = True
            for d_kbt in documents_KBT:
                if BART_model:
                    score = BART_prediction(d_kbt.page_content,d.page_content)
                else:
                    score = chain.invoke(
                        {"premise": d_kbt.page_content, "hypothesis": d.page_content}
                    )
                #grade = score["score"]
                
                #if score.lower() == "neutral": print(f"---Neutral {counter_docs}---") #Debug
                #if score.lower() == "entailment": print(f"---Entailment {counter_docs}---") #Debug
                #if score.lower() == "contradiction": print(f"---Contradiction {counter_docs}---") #Debug
                
                # Document entailed
                if not "neutral" in score.lower():
                    neutral = False
                if "entailment" in score.lower():
                    if verbose: print("---DOCUMENT ENTAILED---")
                    #print(f"---Document accepted {counter_docs}---")  #Debug
                    filtered_docs.append(d)
                    break
            if (neutral and neutral_acceptance) and score.lower() != "entailment":
                filtered_docs.append(d)
                #print(f"---Document accepted {counter_docs}---")  #Debug
                if verbose: print("---DOCUMENT ENTAILED---")
    
    if observer!=None:
        #todo misura metriche con sklearn classifier
        #0 tweet veri, #1 tweet falsi
        y_true = [int(document.metadata.get("label")) for document in documents if "label" in document.metadata]
        y_pred = [0 if document in filtered_docs else 1 for document in documents if "label" in document.metadata]
        #print(f"Real docs with aspect {aspect_id} ,y_true: {y_true}") #Debug
        #print(f"Filtered docs with aspect {aspect_id} ,y_pred: {y_pred}") #Debug
        report = sklearn.metrics.classification_report(y_true,y_pred,labels=[0,1],
                                                       output_dict=True,zero_division=0)
        observer.aspects[f"{aspect}"]["report_entailment"] = report
        
    return {"documents": filtered_docs}


def debiasing(state,verbose,llm,observer):
    if verbose:
        print("---DEBIASING FILTER---")
        print(f"State: {state}")
        
    answer = state["my_answer"]
    aspect = state["aspect"]
    
    prompt = get_prompt("/kaggle/input/prompts/prompts/debiasing_answer.txt")
    examples = get_examples("/kaggle/input/prompts/prompts/debiasing_answer_examples.txt")
    chain = call_model(llm, prompt, ["text"], examples)
    
    unbiased_answer = chain.invoke({"text": answer})
    
    if observer!=None:
        observer.aspects[f"{aspect}"]["debiased_answer"] = unbiased_answer
    
    #print(f"Answer for aspect {aspect}: {unbiased_answer}")
    
    return {"answers_agent": [unbiased_answer], "ord_aspects": [aspect]}


### Conditional edge

def bias_detection(state,verbose,llm,bias_encoder_model):
    if verbose:
        print("---BIAS DETECTION---")
        print(f"State: {state}")
        
    answer = state["my_answer"]
    response = "biased"
    
    prompt = get_prompt("/kaggle/input/prompts/prompts/bias_detection.txt")
    examples = get_examples("/kaggle/input/prompts/prompts/bias_detection_examples.txt")
    chain = call_model(llm, prompt, ["input"], examples)
    
    if bias_encoder_model:
        bias_detection = pipeline('text-classification', model=bias_model, tokenizer=bias_model_tokenizer, device=device) # cuda = 0,1 based on gpu availability
        response = bias_detection(answer)[0]['label'].lower()
    else:
        response = chain.invoke({"input": answer})
        if "biased" in response: response = "biased"
        if "non-biased" in response: response = "non-biased"
        
    if verbose: print(response) #biased, non-biased
    
    return response


def decide_to_generate(state,verbose):
    if verbose:
        print("---ASSESS GRADED DOCUMENTS---")
        print(f"State: {state}")
    web_search = state["web_search"]

    if web_search == "Yes":
        # All documents have been filtered check_relevance
        # We will re-generate a new query
        if verbose: print(
                "---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, INCLUDE WEB SEARCH---"
            )
        return "websearch"
    else:
        # We have relevant documents, so generate answer
        if verbose: print("---DECISION: RELEVANT---")
        return "relevant"

**Building graph with edges**

In [141]:
# Workflow condizionale
def workflow_aspect_agent(configs):
    # Build graph
    workflow = StateGraph(GraphState)

    workflow.add_node("rewrite_query", functools.partial(rewrite_query, verbose=configs.verbose, 
                                                    llm=configs.local_llm, observer=configs.observer))  # query rewriting
    workflow.add_node("retrieve", functools.partial(retrieve, verbose=configs.verbose, 
                                                    retriever=configs.retriever, 
                                                    retrievers_KBT=configs.retrievers_KBT))  # retrieve
    workflow.add_node("generate", functools.partial(generate, verbose=configs.verbose, 
                                                    llm=configs.local_llm, 
                                                    fairness=configs.fairness, observer=configs.observer))  # generatae
    
    if configs.web_search:
        workflow.add_node("websearch", functools.partial(web_search, verbose=configs.verbose))  # web search
        workflow.add_node("grade_documents", functools.partial(grade_documents, verbose=configs.verbose, 
                                                               llm=configs.local_llm,
                                                               observer=configs.observer))  # grade documents
    if configs.safeness:
        workflow.add_node("hate_speech_filter", functools.partial(hate_speech_filter, verbose=configs.verbose, 
                                                                  llm=configs.local_llm, 
                                                                  encoder=configs.hate_encoder_model,
                                                                  observer=configs.observer))
    if configs.trustworthiness:
        workflow.add_node("entailment_filter", functools.partial(entailment_filter, BART_model=configs.BART_model, strategy_entailment=configs.strategy_entailment, 
                                                                 neutral_acceptance=configs.neutral_acceptance, verbose=configs.verbose, llm=configs.local_llm, 
                                                                 observer=configs.observer,
                                                                 shots=configs.shots))  # entailment
    if configs.fairness:  
        workflow.add_node("debiasing_filter", functools.partial(debiasing, verbose=configs.verbose, 
                                                                llm=configs.local_llm,
                                                                observer=configs.observer))
        workflow.add_node("confirm_answer", functools.partial(confirm_answer, verbose=configs.verbose))

    workflow.add_edge(START, "rewrite_query")
    workflow.add_edge("rewrite_query", "retrieve")
    
    if configs.web_search:
        workflow.add_edge("retrieve", "grade_documents")  
        workflow.add_conditional_edges(
            "grade_documents",
            functools.partial(decide_to_generate, verbose=configs.verbose),
            {
                "websearch": "websearch",
                "relevant": "hate_speech_filter" if configs.safeness else "entailment_filter" if configs.trustworthiness else "generate"
            },
        )
    
    if configs.safeness:
        if configs.web_search:
            workflow.add_edge("websearch", "hate_speech_filter")
        else: 
            workflow.add_edge("retrieve", "hate_speech_filter")
        if configs.trustworthiness:
            workflow.add_edge("hate_speech_filter", "entailment_filter")
            workflow.add_edge("entailment_filter", "generate")
        else:
            workflow.add_edge("hate_speech_filter","generate")
    elif configs.trustworthiness:
        if configs.web_search:
            workflow.add_edge("websearch", "entailment_filter")
        else: 
            workflow.add_edge("retrieve", "entailment_filter")
        workflow.add_edge("entailment_filter", "generate")
    else:
        if configs.web_search:
            workflow.add_edge("websearch", "generate")
        else: 
            workflow.add_edge("retrieve", "generate")
    
    if configs.fairness:
        workflow.add_conditional_edges(
            "generate",
            functools.partial(bias_detection, verbose=configs.verbose, llm=configs.local_llm, bias_encoder_model=configs.bias_encoder_model),
            {
                "biased": "debiasing_filter",
                "non-biased": "confirm_answer",
            },
        )
        workflow.add_edge("confirm_answer", END)
        workflow.add_edge("debiasing_filter", END)
    else:
        workflow.add_edge("generate", END)

    workflow_compiled = workflow.compile()
    return workflow_compiled

In [None]:
from IPython.display import Image, display

display(Image(workflow_aspect_agent(configs).get_graph().draw_mermaid_png()))

# Master agent

In [142]:
from typing import Annotated
import operator
from langgraph.constants import Send


### Super Graph State
class SuperGraphState(TypedDict):
    """
    Represents the state of our super-graph.
    """
    
    question: str
    aspects: List[str]
    answers_agent: Annotated[List[str], operator.add]
    ord_aspects: Annotated[List[str], operator.add]
    final_answer: str

def send_aspects(state,verbose):
    if verbose: 
        print("---SEND ASPECT TO EACH ASPECT-AGENT---")
        print(f"State: {state}")
    return [Send("aspect_agent_node", {"original_query": state["question"], "aspect": a, "aspect_id": state["aspects"].index(a)}) for a in state["aspects"]]

def organize_answers(state,verbose,llm,organize):
    if verbose:
        print("---ORGANIZE OUTPUTS---")
        print(f"State: {state}")
    answers_agent = state["answers_agent"]
    ord_aspects = state["ord_aspects"]
    
    #print(f"I'm the master agent and I received: {answers_agent}") #Debug
    if organize: #with sections
        prompt = get_prompt("/kaggle/input/prompts/prompts/final_answer_with_sections.txt")
        chain = call_model(llm, prompt, ["answers","aspects"])
        final_output=chain.invoke({"answers": answers_agent, "aspects": ord_aspects})
    else: #without sections
        prompt = get_prompt("/kaggle/input/prompts/prompts/final_answer.txt")
        chain = call_model(llm, prompt, ["answers"])
        final_output=chain.invoke({"answers": answers_agent})
    return {"final_answer": final_output}

In [143]:
def master_flow(configs):
    master_flow = StateGraph(SuperGraphState)

    # Define the nodes
    master_flow.add_node("organize_answers", functools.partial(organize_answers, verbose=configs.verbose, llm=configs.local_llm, organize=configs.organize))
    master_flow.add_node("aspect_agent_node",workflow_aspect_agent(configs))

    # Build graph
    master_flow.add_conditional_edges(START, functools.partial(send_aspects, verbose=configs.verbose), ["aspect_agent_node"])
    master_flow.add_edge("aspect_agent_node", "organize_answers")
    master_flow.add_edge("organize_answers", END)

    master_compiled = master_flow.compile()
    return master_compiled

In [None]:
from IPython.display import Image, display

# Setting xray to 1 will show the internal structure of the nested graph
display(Image(master_flow(configs).get_graph().draw_mermaid_png()))

# Configuration and app-launching

Vectorstore configuration

In [144]:
index_name = "entailment-test"
aspects = ["Health","Governmental","Society"] #"Technology"

top_retriever = 10 #documents retrieved by retriever
top_KBT = 5 #documents retrievede by KBT retriever

retriever = create_retriever(index_name, top_retriever)
retrievers_KBT = create_KBT_retrievers(aspects, top_KBT)

In [146]:
class Config(object):
    def __init__(self,retriever,retrievers_KBT,aspects):
        self.local_llm = "llama3.1" #"gemma2"
        
        # retrievers
        self.retriever = retriever
        self.retrievers_KBT = retrievers_KBT
        self.aspects = aspects
        
        #if we want print all the process: True
        self.verbose = False 
        
        #if we want to include websearch in the workflow
        self.web_search = False

        # Controlling properties
        self.safeness = False # if we want to add hate speech detection module
        self.trustworthiness = True # if we want to add entailment module with KBT
        self.fairness = False  # if we want to add debiasing module.

        #Controlling entailment
        # strategy for the entailment, False = "Credolous", True = "Skeptical" 
        self.strategy_entailment = True
        #manage the total neutral entailed documents (what if a document is neutral with all documents of KBT)
        # True = accept the neutral documents, False = don't accept
        self.neutral_acceptance = True   
        # True: uses BART model for the entailment, False: uses LLM
        self.BART_model = False
        self.shots = 0 #few-shot learning per chi fa entailment
        
        #controlling bias detection
        # True: uses encoder model. False: uses LLM
        self.bias_encoder_model = False
        
        #controlling hate speech detection
        # True: uses encoder model. False: uses LLM
        self.hate_encoder_model = True
        
        #controlling final generation 
        # True: organize final output in section. False: organize output without sections
        self.organize = False
        
        # For testing
        self.observer = None
    
configs = Config(retriever,retrievers_KBT,aspects)

Starting language model

In [None]:
start_ollama()
pull_model(configs.local_llm)
start_model(configs.local_llm)

In [None]:
inputs = {"question": "How people stayed during covid19 pandemy?", "aspects": configs.aspects}

for output in master_flow(configs).stream(inputs):
    for key, value in output.items():
        pprint(f"Finished running: {key}:")
answer = value["final_answer"]
print(answer)

# Test

In [147]:
# Funzioni di supporto:
import json

def load_json(filename):
    with open(filename, 'r') as file:
        return json.load(file)

def write_file(filename,content):
    with open(filename, 'w') as file:
        json.dump(content, file, indent=4)


"""
queries_gpt = ["Does the COVID-19 vaccine contain a microchip for controlling people?",
"Was the COVID-19 pandemic planned by a global elite?",
"Does inhaling hot steam kill the COVID-19 virus?",
"Was COVID-19 created in a lab as a biological weapon?",
"Does drinking alcohol help prevent COVID-19 infection?",
"Is 5G responsible for spreading COVID-19?",
"Do masks cause a reduction in blood oxygen levels?",
"Do COVID-19 vaccines cause infertility in women?",
"Can high doses of vitamin C cure COVID-19?",
"Can pets transmit COVID-19 to humans?"]
"""

# Query più generali
queries_gpt = ["Tell me about the Covid-19 pandemic.",
"What measures were taken to manage the Covid-19 emergency?",
"What were the repercussions of the Covid-19 pandemic?",
"How did people experience the Covid-19 pandemic?",
"Tell me about the misinformation spread during the Covid-19 pandemic.",
"How important was the role of doctors during the covid19 pandemic?",
"What origin does covid19 have?",
"Effectiveness of COVID-19 vaccines",
"Was the COVID-19 pandemic planned by a global elite?",
"Was COVID-19 created in a lab as a biological weapon?"]

#write_file("/kaggle/working/gpt_queries.json",queries_gpt)

Predisposizione dell'observer per salvare i risultati

In [148]:
class Observer(object):
    def __init__(self):
        self.query=""
        self.type_of_acceptance=""
        self.neutral_acceptance=False
        self.aspects={}
        self.final_answer=""
    
    def generate_dict(self):
        return {"query": self.query,
                "type_of_acceptance": self.type_of_acceptance,
                "neutral_acceptance": self.neutral_acceptance,
                "aspects": self.aspects,
                "final_answer": self.final_answer}

Combinazioni di configurazione con skeptical/credulous e neutral

Multiple combination for entailment:

In [None]:
shots = [0,3,6,12]
combination = [(True,True), (True,False), (False,True), (False,False)] #Skeptical-Neutral #Skeptical-No-Neutral #Cred-Neu #Cred-No-Neu

for shot in shots:
    for comb in combination:
        print(f"Start combination with shots: {shot}")
        configs.shots = shot
        configs.strategy_entailment =  comb[0]
        configs.neutral_acceptance = comb[1]
    
        queries_list = queries_gpt

        attempt = 1
        ret_dict = {}
        for query in queries_list:
            inputs = {"question": query, "aspects": configs.aspects}
            print(f"Attempt {attempt} start")
    
            configs.observer = Observer()
            configs.observer.query=query
            if configs.strategy_entailment:
                configs.observer.type_of_acceptance="Skeptical"
            else:
                configs.observer.type_of_acceptance="Credulous"
            configs.observer.neutral_acceptance=configs.neutral_acceptance

            for output in master_flow(configs).stream(inputs):
                for key, value in output.items():
                    pass
                    #pprint(f"Finished running: {key}:")
            answer = value["final_answer"]
    
            configs.observer.final_answer= answer
            ret_dict[f"attempt {attempt}"] = configs.observer.generate_dict()
    
            attempt = attempt + 1

        stringa = "Neutral" if configs.neutral_acceptance else "No-Neutral"
        write_file(f"/kaggle/working/llama31_{shot}_shots_{configs.observer.type_of_acceptance}_{stringa}.json",ret_dict)

Start combination with shots: 0
Attempt 1 start


time=2024-08-22T18:43:06.523Z level=INFO source=sched.go:710 msg="new model will fit in available VRAM in single GPU, loading" model=/root/.ollama/models/blobs/sha256-8eeb52dfb3bb9aefdf9d1ef24b3bdbcfbe82238798c4b918278320b6fcef18fe gpu=GPU-530c08b6-12d8-cce5-7b49-bd8a8fe870c2 parallel=4 available=15615524864 required="6.2 GiB"
time=2024-08-22T18:43:06.524Z level=INFO source=memory.go:309 msg="offload to cuda" layers.requested=-1 layers.model=33 layers.offload=33 layers.split="" memory.available="[14.5 GiB]" memory.required.full="6.2 GiB" memory.required.partial="6.2 GiB" memory.required.kv="1.0 GiB" memory.required.allocations="[6.2 GiB]" memory.weights.total="4.7 GiB" memory.weights.repeating="4.3 GiB" memory.weights.nonrepeating="411.0 MiB" memory.graph.full="560.0 MiB" memory.graph.partial="677.5 MiB"
time=2024-08-22T18:43:06.525Z level=INFO source=server.go:393 msg="starting llama server" cmd="/tmp/ollama2381662000/runners/cuda_v11/ollama_llama_server --model /root/.ollama/models/b

INFO [main] build info | build=1 commit="1e6f655" tid="137257124888576" timestamp=1724352186
INFO [main] system info | n_threads=2 n_threads_batch=-1 system_info="AVX = 1 | AVX_VNNI = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | AVX512_BF16 = 0 | FMA = 0 | NEON = 0 | SVE = 0 | ARM_FMA = 0 | F16C = 0 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | MATMUL_INT8 = 0 | LLAMAFILE = 1 | " tid="137257124888576" timestamp=1724352186 total_threads=4
INFO [main] HTTP server listening | hostname="127.0.0.1" n_threads_http="6" port="37571" tid="137257124888576" timestamp=1724352186


time=2024-08-22T18:43:06.778Z level=INFO source=server.go:627 msg="waiting for server to become available" status="llm server loading model"
llama_model_loader: - kv  24:                      tokenizer.ggml.merges arr[str,280147]  = ["Ġ Ġ", "Ġ ĠĠĠ", "ĠĠ ĠĠ", "...
llama_model_loader: - kv  25:                tokenizer.ggml.bos_token_id u32              = 128000
llama_model_loader: - kv  26:                tokenizer.ggml.eos_token_id u32              = 128009
llama_model_loader: - kv  27:                    tokenizer.chat_template str              = {{- bos_token }}\n{%- if custom_tools ...
llama_model_loader: - kv  28:               general.quantization_version u32              = 2
llama_model_loader: - type  f32:   66 tensors
llama_model_loader: - type q4_0:  225 tensors
llama_model_loader: - type q6_K:    1 tensors
llm_load_vocab: special tokens cache size = 256
llm_load_vocab: token to piece cache size = 0.7999 MB
llm_load_print_meta: format           = GGUF V3 (latest)
llm_load_prin

INFO [main] model loaded | tid="137257124888576" timestamp=1724352190


time=2024-08-22T18:43:10.183Z level=INFO source=server.go:632 msg="llama runner started in 3.66 seconds"


[GIN] 2024/08/22 - 18:43:11 | 200 |  4.885420409s |       127.0.0.1 | POST     "/api/generate"
[GIN] 2024/08/22 - 18:43:11 | 200 |  4.919519566s |       127.0.0.1 | POST     "/api/generate"
[GIN] 2024/08/22 - 18:43:11 | 200 |    4.9411836s |       127.0.0.1 | POST     "/api/generate"
[GIN] 2024/08/22 - 18:43:14 | 200 |  931.920918ms |       127.0.0.1 | POST     "/api/generate"
[GIN] 2024/08/22 - 18:43:14 | 200 |  1.137370246s |       127.0.0.1 | POST     "/api/generate"
[GIN] 2024/08/22 - 18:43:14 | 200 |  1.245426761s |       127.0.0.1 | POST     "/api/generate"
[GIN] 2024/08/22 - 18:43:15 | 200 |  916.359594ms |       127.0.0.1 | POST     "/api/generate"
[GIN] 2024/08/22 - 18:43:15 | 200 |  865.091196ms |       127.0.0.1 | POST     "/api/generate"
[GIN] 2024/08/22 - 18:43:15 | 200 |  664.676513ms |       127.0.0.1 | POST     "/api/generate"
[GIN] 2024/08/22 - 18:43:16 | 200 |   573.24136ms |       127.0.0.1 | POST     "/api/generate"
[GIN] 2024/08/22 - 18:43:16 | 200 |  853.410602ms 

Single test for a list of query (complete test):

In [None]:
#queries_list = load_queries("/kaggle/input/preference/gpt_queries.json")
queries_list = ["Talk to me about COVID-19"] #queries_gpt

attempt = 1
ret_dict = {}
for query in queries_list:
    inputs = {"question": query, "aspects": configs.aspects}
    print(f"Attempt {attempt} start")
    
    configs.observer = Observer()
    configs.observer.query=query
    if configs.strategy_entailment:
        configs.observer.type_of_acceptance="Skeptical"
    else:
        configs.observer.type_of_acceptance="Credulous"
    configs.observer.neutral_acceptance=configs.neutral_acceptance

    for output in master_flow(configs).stream(inputs):
        for key, value in output.items():
            pass
            #pprint(f"Finished running: {key}:")
    answer = value["final_answer"]
    
    configs.observer.final_answer= answer
    ret_dict[f"attempt {attempt}"] = configs.observer.generate_dict()
    
    attempt = attempt + 1

stringa = "Neutral" if configs.neutral_acceptance else "No-Neutral"
write_file(f"/kaggle/working/complete_test_llama31.json",ret_dict)

In [None]:
print(ret_dict)

# Evaluation

## Entailment evaluation

In [None]:
import pandas as pd

### Valutazione su notizie vere (0) e notizie false (1)

In [None]:
# Label 0, notizie vere
# Label 1, notizie false

def results_on_label(label, num_attempts, dict_input, aspects):
    recall = 0
    precision = 0
    support = 0
    valid_attempts = 0
    recalls = []
    precisions = []

    for i in range(1,num_attempts+1):
        for aspect in aspects:
            recall = recall + dict_input[f"attempt {i}"]["aspects"][f"{aspect}"]["report_entailment"][label]["recall"]
            precision = precision + dict_input[f"attempt {i}"]["aspects"][f"{aspect}"]["report_entailment"][label]["precision"]
            support_dict = dict_input[f"attempt {i}"]["aspects"][f"{aspect}"]["report_entailment"][label]["support"]
            # Il tentativo è valido solo se c'è almeno un documento recuperato!
            if support_dict != 0:
                recalls.append(dict_input[f"attempt {i}"]["aspects"][f"{aspect}"]["report_entailment"][label]["recall"])
                precisions.append(dict_input[f"attempt {i}"]["aspects"][f"{aspect}"]["report_entailment"][label]["precision"])
                valid_attempts = valid_attempts + 1
                support = support + support_dict
                
    e_recall = recall/valid_attempts
    e_precision = precision/valid_attempts
    f1 = 2*e_recall*e_precision/(e_recall+e_precision)
    diff_rec = [(rec - e_recall)**2 for rec in recalls]
    diff_prec = [(prec - e_precision)**2 for prec in precisions]
    var_recall = sum(diff_rec)/valid_attempts
    var_precision =  sum(diff_prec)/valid_attempts
    return {"recall":e_recall, 
            "var_recall": var_recall,
            "precision":e_precision,
            "var_precision": var_precision,
            "f1":f1, 
            "support":support/(num_attempts*len(aspects))}

### Valutazione per aspetto

In [None]:
def results_on_aspects(aspect, num_attempts, dict_input):
    recall_0 = 0
    precision_0 = 0
    recall_1 = 0
    precision_1 = 0
    
    accuracy = 0
    
    weight_recall = 0
    weight_precision = 0
    
    valid_attempts_0 = 0
    valid_attempts_1 = 0

    for i in range(1,num_attempts+1):
        recall_0 = recall_0 + dict_input[f"attempt {i}"]["aspects"][f"{aspect}"]["report_entailment"]["0"]["recall"]
        precision_0 = precision_0 + dict_input[f"attempt {i}"]["aspects"][f"{aspect}"]["report_entailment"]["0"]["precision"]
        support_dict_0 = dict_input[f"attempt {i}"]["aspects"][f"{aspect}"]["report_entailment"]["0"]["support"]
        if support_dict_0 != 0:
            valid_attempts_0 = valid_attempts_0 + 1
        
        recall_1 = recall_1 + dict_input[f"attempt {i}"]["aspects"][f"{aspect}"]["report_entailment"]["1"]["recall"]
        precision_1 = precision_1 + dict_input[f"attempt {i}"]["aspects"][f"{aspect}"]["report_entailment"]["1"]["precision"]
        support_dict_1 = dict_input[f"attempt {i}"]["aspects"][f"{aspect}"]["report_entailment"]["1"]["support"]
        if support_dict_1 != 0:
            valid_attempts_1 = valid_attempts_1 + 1
            
        weight_recall = weight_recall + dict_input[f"attempt {i}"]["aspects"][f"{aspect}"]["report_entailment"]["weighted avg"]["recall"]
        weight_precision = weight_precision + dict_input[f"attempt {i}"]["aspects"][f"{aspect}"]["report_entailment"]["weighted avg"]["precision"]
                
        try:
            accuracy = accuracy + dict_input[f"attempt {i}"]["aspects"][f"{aspect}"]["report_entailment"]["accuracy"]
        except:
            accuracy = accuracy + 1 #se l'accuracy non è presente nel dizionario, è perché vale 1 (vedi esempio 4 di llama_second_skeptical_neutral)
    
    e_recall_0 = recall_0/valid_attempts_0
    e_precision_0 = precision_0/valid_attempts_0
    
    e_recall_1 = recall_1/valid_attempts_1
    e_precision_1 = precision_1/valid_attempts_1
    
    macro_recall = (e_recall_0+e_recall_1)/2
    macro_precision = (e_precision_0+e_precision_1)/2
    f1 = 2*macro_recall*macro_precision/(macro_recall+macro_precision)
    
    w_recall = weight_recall/(num_attempts)
    w_precision = weight_precision/(num_attempts)
    weight_f1 = 2*w_recall*w_precision/(w_recall+w_precision)
    return {"macro_recall": macro_recall,
            "macro_precision": macro_precision, 
            "macro_f1": f1,
            "macro_accuracy":accuracy/(num_attempts),
            "w_recall": w_recall,
            "w_precision": w_precision, 
            "w_f1":weight_f1}

In [None]:
import json

def load_json(filename):
    with open(filename, 'r') as file:
        return json.load(file)

def write_file(filename,content):
    with open(filename, 'w') as file:
        json.dump(content, file, indent=4)

In [None]:
# label 0: notizie vere (negative class)
# label 1: notizie false (positive class)
import pandas as pd

def compute_result_on_label(model, shot, acceptance, neutral, num_queries, aspects) -> pd.DataFrame:
    if shot=="first" or model=="BART" or model=="gemma2":
        dict_input = load_json(f"/kaggle/working/test_{model}_{acceptance}_{neutral}.json")
    else:
        #dict_input = load_json(f"/kaggle/working/test_{model}_{shot}_{acceptance}_{neutral}.json")
        dict_input = load_json(f"/kaggle/working/{model}_{shot}_shots_{acceptance}_{neutral}.json")
        #dict_input = load_json(f"/kaggle/working/test_{model}_gov_10kbt_{acceptance}_{neutral}.json")

    results_label_0 = results_on_label("0", num_queries, dict_input, aspects)
    results_label_1 = results_on_label("1", num_queries, dict_input, aspects)
    

    df = pd.DataFrame()
    df["model"] = [model]
    df["shots"] = [shot]
    df["num_queries"] = [num_queries]
    df["retrieved"] = [10 if num_queries==10 else 20]
    df["type_acceptance"] = [acceptance]
    df["neutral"] = [neutral]

    df["precision_0"] = results_label_0["precision"]
    df["var_prec_0"] = results_label_0["var_precision"]
    df["recall_0"] = results_label_0["recall"]
    df["var_recall_0"] = results_label_0["var_recall"]
    df["f1_0"] = results_label_0["f1"]
    df["support_0"] = results_label_0["support"]
    

    df["precision_1"] = results_label_1["precision"]
    df["var_prec_1"] = results_label_1["var_precision"]
    df["recall_1"] = results_label_1["recall"]
    df["var_recall_1"] = results_label_1["var_recall"]
    df["f1_1"] = results_label_1["f1"]
    df["support_1"] = results_label_1["support"]

    return df

In [None]:
def compute_result_on_aspects(model, shot, acceptance, neutral, num_queries, aspects) -> pd.DataFrame:
    if shot=="first" or model=="BART" or model=="gemma2":
        dict_input = load_json(f"/kaggle/working/test_{model}_{acceptance}_{neutral}.json")
    else:
        #dict_input = load_json(f"/kaggle/working/test_{model}_{shot}_{acceptance}_{neutral}.json")
        dict_input = load_json(f"/kaggle/working/{model}_{shot}_shots_{acceptance}_{neutral}.json")
        #dict_input = load_json(f"/kaggle/working/test_{model}_gov_{acceptance}_{neutral}.json")
        
    df = pd.DataFrame()
    df["model"] = [model]
    df["shots"] = [shot]
    df["num_queries"] = [num_queries]
    df["retrieved"] = [10 if num_queries==10 else 20]
    df["type_acceptance"] = [acceptance]
    df["neutral"] = [neutral]
    
    for aspect in aspects:
        result = results_on_aspects(aspect, num_queries, dict_input)
        df[f"precision_{aspect}"] = result["macro_precision"]
        df[f"recall_{aspect}"] = result["macro_recall"]
        df[f"f1_{aspect}"] = result["macro_f1"]
        df[f"accuracy_{aspect}"] = result["macro_accuracy"]
        
    return df

In [None]:
import openpyxl

attempts = 10
num_aspects = 3

models = ["llama31"] #["gemma2","BART",]
shots = [0,3,6,12] #["first","second","third"]
type_acceptance = ["Skeptical","Credulous"]
neutral_acceptance = ["No-Neutral","Neutral"]
aspects = ["Health","Governmental","Society"]

excel_file = "/kaggle/working/test_entailment_labels_X.xlsx"
results = pd.DataFrame()
for model in models:
    for shot in shots:
        for acceptance in type_acceptance:
            for neutral in neutral_acceptance:
                new_row = compute_result_on_label(model, shot, acceptance, neutral, attempts, aspects)
                results = pd.concat([results, new_row], ignore_index=True)
        if model=="BART": break

print(results.round(3))
#results.round(3).to_excel(excel_file, index=False, engine='openpyxl')

In [None]:
attempts = 10
num_aspects = 3

models = ["llama31"]#["BART","llama31"] #["gemma2"]
shots = [0,3,6,12] #["first","second","third"]
type_acceptance = ["Skeptical","Credulous"]
neutral_acceptance = ["No-Neutral","Neutral"]
aspects = ["Health","Governmental","Society"]

excel_file = "/kaggle/working/test_entailment_aspects_X.xlsx"
results = pd.DataFrame()
for model in models:
    for shot in shots:
        for acceptance in type_acceptance:
            for neutral in neutral_acceptance:
                new_row = compute_result_on_aspects(model, shot, acceptance, neutral, attempts, aspects)
                results = pd.concat([results, new_row], ignore_index=True)
        if model=="BART": break

print(results.round(3))
#results.round(3).to_excel(excel_file, index=False, engine='openpyxl')

# Ragas (application evaluation)

https://docs.ragas.io/en/stable/

In [None]:
#todo