# RAG and Conversational AI Interface

This is a demo application demonstrating the use of RAG and conversational models with GPT-3.5-turbo and GPT-4o. To run this, if no further indexing is required, execute all cells except for the Data Indexing (Optional). Click the link in the last code block and navigate through the UI, asking questions using different setups.

In [1]:
# Clone the main repository to retrieve the structured folders
!git clone https://github.com/gpapageorgiouedu/Enhancing-E-Government-Services-through-a-SOTA-Modular-and-Reproducible-Architecture-over-LLMs.git

Cloning into 'Enhancing-E-Government-Services-through-a-SOTA-Modular-and-Reproducible-Architecture-over-LLMs'...
remote: Enumerating objects: 28, done.[K
remote: Counting objects: 100% (28/28), done.[K
remote: Compressing objects: 100% (24/24), done.[K
remote: Total 28 (delta 6), reused 13 (delta 0), pack-reused 0[K
Receiving objects: 100% (28/28), 42.76 KiB | 684.00 KiB/s, done.
Resolving deltas: 100% (6/6), done.


#### Imports

In [2]:
# Install required packages
%%bash
pip install -q farm-haystack[pinecone,inference,crawler]==1.26.2
pip install -q fastapi==0.112.0 uvicorn==0.30.5 python-multipart==0.0.9 PyMuPDF==1.24.9 python-docx==1.1.2

     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 151.6/151.6 kB 696.8 kB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 134.8/134.8 kB 12.3 MB/s eta 0:00:00
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 763.7/763.7 kB 28.9 MB/s eta 0:00:00
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 8.8/8.8 MB 75.4 MB/s eta 0:00:00
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 179.4/179.4 kB 14.5 MB/s eta 0:00:00
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 3.1/3.1 MB 72.8 MB/s eta 0:00:00
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 48.7/48.7 kB 4.4 MB/s eta 0:00:00
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 9.4/9.4 MB 84.0 MB/s eta 0:00:00
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 227.1/227.1 kB 19.7 MB/s eta 0:00:00
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.6/1.6 MB 60.5 MB/s eta 0:00:00
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.1/1.1 MB 52.7 MB/s eta 0:00:00
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 75.6/75.6 kB 7.1 MB/s eta 0:00:00
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 77.9/

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
albumentations 1.4.12 requires pydantic>=2.7.0, but you have pydantic 1.10.17 which is incompatible.
ipython-sql 0.5.0 requires sqlalchemy>=2.0, but you have sqlalchemy 1.4.53 which is incompatible.


In [3]:
# Install required packages
!apt-get install -y poppler-utils

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following NEW packages will be installed:
  poppler-utils
0 upgraded, 1 newly installed, 0 to remove and 45 not upgraded.
Need to get 186 kB of archives.
After this operation, 696 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 poppler-utils amd64 22.02.0-2ubuntu0.5 [186 kB]
Fetched 186 kB in 1s (195 kB/s)
Selecting previously unselected package poppler-utils.
(Reading database ... 123598 files and directories currently installed.)
Preparing to unpack .../poppler-utils_22.02.0-2ubuntu0.5_amd64.deb ...
Unpacking poppler-utils (22.02.0-2ubuntu0.5) ...
Setting up poppler-utils (22.02.0-2ubuntu0.5) ...
Processing triggers for man-db (2.10.2-1) ...


In [4]:
# Import libraries
import inspect
import json
import logging
import os
import tempfile
from pathlib import Path
from threading import Thread

import fitz
import pandas as pd
import uvicorn
from docx import Document as DocxDocument
from fastapi import FastAPI, Request, Form, UploadFile, File, HTTPException
from fastapi.responses import HTMLResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
from google.colab.output import eval_js
from haystack import Pipeline, Document
from haystack.agents import Tool
from haystack.agents.conversational import ConversationalAgent
from haystack.agents.memory import ConversationSummaryMemory
from haystack.document_stores import PineconeDocumentStore, InMemoryDocumentStore
from haystack.nodes import (Crawler, PreProcessor, EmbeddingRetriever,
                            PromptNode, PromptTemplate, AnswerParser)
from haystack.nodes.retriever.web import WebRetriever
from haystack.nodes.ranker.diversity import DiversityRanker
from haystack.nodes.ranker.lost_in_the_middle import LostInTheMiddleRanker
from haystack.nodes.sampler.top_p_sampler import TopPSampler
from haystack.utils import convert_files_to_docs

In [5]:
# Get the needed Keys (HugginFace is optional, Pinecone can be replaced with any Vector DB and OPENAI as well with another LLM)
from google.colab import userdata
hf_token=userdata.get('HF_TOKEN')
pinecone_api_key=userdata.get('PINECONE')
OPENAI_API_KEY=userdata.get('OPENAI')
search_key=userdata.get('SERPER')

In [6]:
# Set the directories from GitHub repository
templates_directory='/content/Enhancing-E-Government-Services-through-a-SOTA-Modular-and-Reproducible-Architecture-over-LLMs/templates'
static_directory='/content/Enhancing-E-Government-Services-through-a-SOTA-Modular-and-Reproducible-Architecture-over-LLMs/static'
research_directory='/content/Enhancing-E-Government-Services-through-a-SOTA-Modular-and-Reproducible-Architecture-over-LLMs/research'
pc_directory='/content/Enhancing-E-Government-Services-through-a-SOTA-Modular-and-Reproducible-Architecture-over-LLMs/presscorner'

##### Helper Functions for indexing

In [7]:
# Useful functions for index
def load_file_as_document(file_path):
    """
    Load a text file and return its content as a Haystack's Document object.

    Parameters:
      file_path (str): The path to the text file to be loaded.

    Returns:
      Document: A Haystack's Document object from Haystack containing the text content of the file.
    """
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()
    return Document(content=content)

def load_text_files_as_documents(txt_folder_path):
    """
    Load all text files from a specified folder and return them as a list of Haystack's Document objects.

    Parameters:
      txt_folder_path (str): The path to the folder containing the text files.

    Returns:
      documents: A list of Haystack's Document objects, each containing the text content of a file.
    """
    documents = []
    for txt_file in Path(txt_folder_path).glob('*.txt'):
        with open(txt_file, 'r', encoding='utf-8') as file:
            content = file.read()
            documents.append(Document(content=content))
    return documents

def extract_text_from_file(file_path, content_type):
    """
    Extract text content from a file based on its content type.

    Supported content types: PDF, Microsoft Word (both DOC and DOCX), and plain text files.

    Parameters:
      file_path (str): The path to the file to be processed.
      content_type (str): The MIME type of the file to determine the extraction method.

    Returns:
      text: The extracted text content from the file in str format.

    Raises:
      HTTPException: If the file type is unsupported or an error occurs during extraction.
    """
    try:
        if content_type == "application/pdf":
            doc = fitz.open(file_path)  # Open PDF file using PyMuPDF
            text = "".join([page.get_text() for page in doc])
        elif content_type in ["application/msword", "application/vnd.openxmlformats-officedocument.wordprocessingml.document"]:
            doc = DocxDocument(file_path)  # Open Word document using python-docx
            text = "\n".join([para.text for para in doc.paragraphs])
        elif content_type == "text/plain":
            with open(file_path, "r", encoding="utf-8") as file:
                text = file.read()  # Read plain text file
        else:
            raise HTTPException(status_code=400, detail="Unsupported file type")
        return text
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error extracting text: {str(e)}")

#### Document Store and Index Configuration

##### Indexing pipeline for PressCorner

In [8]:
# Press Corner
# Initialize a Pinecone document store for storing and retrieving embedded documents (Press Corner) -> the DB can be changed
document_store_pc = PineconeDocumentStore(
    api_key=pinecone_api_key,
    similarity="cosine",
    index="press_corner_demo",
    embedding_dim=1536,
    environment="us-west4-gcp-free",
    progress_bar=True
)

# Initialize a preprocessor to clean and split documents before indexing
preprocessor_pc = PreProcessor(
    clean_empty_lines=True,
    clean_whitespace=False,
    clean_header_footer=True,
    split_by="word",
    split_length=200,
    split_overlap=30,
    split_respect_sentence_boundary=True,
)

# Initialize a retriever to embed documents using a pre-trained model -> the embedding_model can be changed
retriever_pc = EmbeddingRetriever(
    document_store=document_store_pc,
    batch_size=8,
    embedding_model="text-embedding-ada-002",
    api_key=OPENAI_API_KEY,
    max_seq_len=1536
)

# Define the indexing pipeline to process and store documents in Pinecone for Press Corner data
indexing_pipeline_pc = Pipeline()
indexing_pipeline_pc.add_node(
    component=preprocessor_pc,
    name="preprocessor_pc",
    inputs=['File']
)
indexing_pipeline_pc.add_node(
    component=retriever_pc,
    name="retriever_pc",
    inputs=['preprocessor_pc']
)
indexing_pipeline_pc.add_node(
    component=document_store_pc,
    name="document_store_pc",
    inputs=['retriever_pc']
)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


##### Indexing pipeline for Research docs

In [9]:
# Research
# Initialize a Pinecone document store for storing and retrieving embedded documents (Research) -> the DB can be changed
document_store_research = PineconeDocumentStore(
    api_key=pinecone_api_key,
    similarity="cosine",
    index="research_demo",
    embedding_dim=1536,
    environment="us-west4-gcp-free",
    progress_bar=True
)

# Initialize a preprocessor to clean and split documents before indexing
preprocessor_research = PreProcessor(
     clean_empty_lines=True,
     clean_whitespace=False,
     clean_header_footer=True,
     split_by="word",
     split_length=200,
     split_overlap = 30,
     split_respect_sentence_boundary=True,
)
# Initialize a retriever to embed documents using a pre-trained model -> the embedding_model can be changed
retriever_research = EmbeddingRetriever(
    document_store=document_store_research,
    batch_size=8, embedding_model="text-embedding-ada-002",
    api_key=OPENAI_API_KEY,max_seq_len=1536)

# Define the indexing pipeline to process and store documents in Pinecone for Research documents
indexing_pipeline_research = Pipeline()
indexing_pipeline_research.add_node(
    component=preprocessor_research,
    name="preprocessor_research",
    inputs=['File']
)
indexing_pipeline_research.add_node(
    component=retriever_research,
    name="retriever_research",
    inputs=['preprocessor_research']
)
indexing_pipeline_research.add_node(
    component=document_store_research,
    name="document_store_research",
    inputs=['retriever_research']
)

##### Indexing pipeline for In Memory docs

In [10]:
# In memory
# Initialize an In Memory document store for storing and retrieving embedded documents temporary
document_store_inmemory = InMemoryDocumentStore()
# Initialize a preprocessor to clean and split documents before indexing
preprocessor_memory = PreProcessor(
     clean_empty_lines=True,
     clean_whitespace=False,
     clean_header_footer=True,
     split_by="word",
     split_length=200,
     split_overlap = 30,
     split_respect_sentence_boundary=True,
)

# Initialize a retriever to embed documents using a pre-trained model -> the embedding_model can be changed
retriever_inmemory = EmbeddingRetriever(
    document_store=document_store_inmemory,
    batch_size=8,
    embedding_model="text-embedding-ada-002",
    api_key=OPENAI_API_KEY,max_seq_len=1536
)

# Define the indexing pipeline to process and store documents In Memory temporary documents
indexing_pipeline_inmemory = Pipeline()
indexing_pipeline_inmemory.add_node(
    component=preprocessor_memory,
    name="preprocessor_inmemory",
    inputs=['File']
)
indexing_pipeline_inmemory.add_node(
    component=retriever_inmemory,
    name="retriever_inmemory",
    inputs=['preprocessor_inmemory']
)
indexing_pipeline_inmemory.add_node(
    component=document_store_inmemory,
    name="document_store_inmemory",
    inputs=['retriever_inmemory']
)

#### RAG and Conversational Pipelines Configuration

##### RAG for Research

In [11]:
# RAG for Research
# Define the prompt for Research RAG
prompt = """
Synthesize a comprehensive answer from the following most relevant context and the given question.
If the documents do not contain the answer to the question, say that answering is not possible given the available information.
Provide a clear and concise answer, no longer than 100 words.
\n\n Context: {join(documents)} \n\n Question: {query} \n\n Answer:
"""

# Initialize a retriever for Research documents' embeddings
retriever_research = EmbeddingRetriever(
    document_store=document_store_research,
    embedding_model="text-embedding-ada-002",
    api_key=OPENAI_API_KEY,
    top_k=5,
    progress_bar=False
)

# Create a prompt template and a parser for generating structured responses
template = PromptTemplate(
    prompt=prompt,
    output_parser=AnswerParser()
)

# Initialize a prompt node for Q&A using GPT-3.5-turbo
prompt_node = PromptNode(
    model_name_or_path="gpt-3.5-turbo",
    default_prompt_template=template,
    api_key=OPENAI_API_KEY,
    max_length=200
)

# Define RAG pipeline for Research using GPT-3.5
rag_pipeline_research = Pipeline()
rag_pipeline_research.add_node(
    component=retriever_research,
    name="Retriever_research",
    inputs=["Query"]
)
rag_pipeline_research.add_node(
    component=prompt_node,
    name="PromptNode",
    inputs=["Retriever_research"]
)

# Initialize a prompt node for Q&A using GPT-4o
prompt_node_gpt_4o_reseach = PromptNode(
    model_name_or_path="gpt-4o",
    default_prompt_template=template,
    api_key=OPENAI_API_KEY,
    max_length=200
)

# Define a RAG pipeline for Research using GPT-4o
rag_pipeline_gpt_4o_reseach = Pipeline()
rag_pipeline_gpt_4o_reseach.add_node(
    component=retriever_research,
    name="Retriever",
    inputs=["Query"]
)
rag_pipeline_gpt_4o_reseach.add_node(
    component=prompt_node_gpt_4o_reseach,
    name="PromptNode",
    inputs=["Retriever"]
)

##### RAG for In Memory docs

In [12]:
# RAG for in memory
# Define the prompt for Research RAG
prompt = """
Synthesize a comprehensive answer from the following most relevant context and the given question.
If the documents do not contain the answer to the question, say that answering is not possible given the available information.
Provide a clear and concise answer, no longer than 100 words.
\n\n Context: {join(documents)} \n\n Question: {query} \n\n Answer:
"""

# Initialize a retriever for In Memory documents' embeddings
retriever_inmemory = EmbeddingRetriever(
    document_store=document_store_inmemory,
    embedding_model="text-embedding-ada-002",
    api_key=OPENAI_API_KEY,
    top_k=5,
    progress_bar=False
)

# Create a prompt template and parser for generating structured responses
template = PromptTemplate(
    prompt=prompt,
    output_parser=AnswerParser()
)

# Initialize a prompt node for Q&A using GPT-3.5-turbo
prompt_node = PromptNode(
    model_name_or_path="gpt-3.5-turbo",
    default_prompt_template=template,
    api_key=OPENAI_API_KEY,
    max_length=200
)

# Define the RAG pipeline for In Memory tasks using GPT-3.5
rag_pipeline_inmemory = Pipeline()
rag_pipeline_inmemory.add_node(
    component=retriever_inmemory,
    name="Retriever_inmemory",
    inputs=["Query"]
)
rag_pipeline_inmemory.add_node(
    component=prompt_node,
    name="PromptNode",
    inputs=["Retriever_inmemory"]
)

# Initialize a prompt node for In Memory tasks using GPT-4o
prompt_node_gpt_4o_inmemory = PromptNode(
    model_name_or_path="gpt-4o",
    default_prompt_template=template,
    api_key=OPENAI_API_KEY,
    max_length=200
)

# Define a RAG pipeline for In Memory tasks using GPT-4o
rag_pipeline_gpt_4o_inmemory = Pipeline()
rag_pipeline_gpt_4o_inmemory.add_node(
    component=retriever_inmemory,
    name="Retriever",
    inputs=["Query"]
)
rag_pipeline_gpt_4o_inmemory.add_node(
    component=prompt_node_gpt_4o_inmemory,
    name="PromptNode",
    inputs=["Retriever"]
)

##### RAG for Presscorner

In [13]:
# RAG for Presscorner
prompt = """
Synthesize a comprehensive answer from the following most relevant context and the given question.
If the documents do not contain the answer to the question, say that answering is not possible given the available information.
Provide a clear and concise answer, no longer than 100 words.
\n\n Context: {join(documents)} \n\n Question: {query} \n\n Answer:
"""

# Initialize a retriever for PressCorner documents' embeddings
retriever_pc = EmbeddingRetriever(
    document_store=document_store_pc,
    embedding_model="text-embedding-ada-002",
    api_key=OPENAI_API_KEY, top_k=5,
    progress_bar=False
)

# Create a prompt template and parser for generating structured responses
template = PromptTemplate(
    prompt=prompt,
    output_parser=AnswerParser()
)
prompt_node = PromptNode(
    model_name_or_path="gpt-3.5-turbo",
    default_prompt_template=template,
    api_key=OPENAI_API_KEY,
    max_length=200
)

rag_pipeline_pc = Pipeline()
rag_pipeline_pc.add_node(
    component=retriever_pc,
    name="Retriever",
    inputs=["Query"]
)
rag_pipeline_pc.add_node(
    component=prompt_node,
    name="PromptNode",
    inputs=["Retriever"]
)

# RAG with GPT-4o
prompt_node_gpt_4o_pc = PromptNode(
    model_name_or_path="gpt-4o",
    default_prompt_template=template,
    api_key=OPENAI_API_KEY,
    max_length=200
)
rag_pipeline_gpt_4o_pc  = Pipeline()
rag_pipeline_gpt_4o_pc.add_node(
    component=retriever_pc,
    name="Retriever",
    inputs=["Query"]
)
rag_pipeline_gpt_4o_pc.add_node(
    component=prompt_node_gpt_4o_pc,
    name="PromptNode",
    inputs=["Retriever"]
)

##### RAG for Websearch

In [14]:
# Create a web search prompt node using a specified model and API key (Serper)
def create_websearch_prompt_node(api_key, model="gpt-3.5-turbo", max_length=200):
    """
    Creates a PromptNode configured for web search pipeline.

    Parameters:
      api_key (str): API key for accessing the language model (depends on the provider if exists, see Haystack's documentation for PromptNode).
      model (str): The name or path of the model to be used.
      max_length (int): Maximum length of the generated responses.

    Returns:
      PromptNode: Configured prompt node for generating answers based on web search retrieved results.
    """
    prompt = """
    Synthesize a comprehensive answer from the following most relevant context and the given question.
    If the documents do not contain the answer to the question, say that answering is not possible given the available information.
    Provide a clear and concise answer, no longer than 400 words.
    \n\n Context: {join(documents, delimiter=new_line, pattern=new_line+'Passage[$idx]:'+new_line+'Url: $url'+new_line+'Search Score: $search.score'+new_line+'Snippet Text: $snippet_text'+new_line+'Score: $score')} \n\n Question: {query} \n\n Answer:
    """
    template = PromptTemplate(prompt=prompt, output_parser=AnswerParser())
    return PromptNode(
        model_name_or_path=model,
        default_prompt_template=template,
        api_key=api_key,
        max_length=max_length,
    )

# Create a web search pipeline using a given prompt node and search API key (Serper)
def create_websearch_pipeline(prompt_node, search_key):
    """
    Creates a pipeline for retrieving, processing, and synthesizing answers from web search retrieved results.

    Parameters:
      prompt_node (PromptNode): The prompt node for generating responses.
      search_key (str): API key for accessing web search functionality (Serper in our use case).

    Returns:
      Pipeline: Configured pipeline for web search and response generation.
    """
    web_retriever = WebRetriever(
        api_key=search_key,
        top_search_results=10,
        mode="preprocessed_documents",
        top_k=50,
    )

    sampler = TopPSampler(
        top_p=0.97
    )  # https://docs.haystack.deepset.ai/v1.25/docs/toppsampler

    diversity_ranker = DiversityRanker()  # Rank documents based on diversity of content

    litm_ranker = LostInTheMiddleRanker(word_count_threshold=1024)  # Rank documents based on relevance to the query
    # https://docs.haystack.deepset.ai/v1.25/docs/ranker#lostinthemiddleranker

    pipeline = Pipeline()
    pipeline.add_node(component=web_retriever, name="Retriever", inputs=["Query"])
    pipeline.add_node(component=sampler, name="Sampler", inputs=["Retriever"])
    pipeline.add_node(
        component=diversity_ranker, name="DiversityRanker", inputs=["Sampler"]
    )  # https://docs.haystack.deepset.ai/v1.25/docs/ranker#diversityranker
    pipeline.add_node(
        component=litm_ranker, name="LostInTheMiddleRanker", inputs=["DiversityRanker"]
    )
    pipeline.add_node(
        component=prompt_node, name="PromptNode", inputs=["LostInTheMiddleRanker"]
    )
    return pipeline

# Instantiate prompt nodes for web search using GPT-3.5 and GPT-4o
prompt_node_websearch_gpt_3_5 = create_websearch_prompt_node(
    OPENAI_API_KEY, "gpt-3.5-turbo"
)

prompt_node_websearch_gpt_4 = create_websearch_prompt_node(
    OPENAI_API_KEY, "gpt-4o"
)

# Create web search pipelines for GPT-3.5 and GPT-4o
websearch_pipeline = create_websearch_pipeline(
    prompt_node_websearch_gpt_3_5, search_key
)

websearch_pipeline_gpt_4 = create_websearch_pipeline(
    prompt_node_websearch_gpt_4, search_key
)



config.json:   0%|          | 0.00/794 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/316 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

##### Agent Configuration

In [15]:
# Configuration for Agents
agent_prompt_node = PromptNode(
    "gpt-3.5-turbo",
    api_key=OPENAI_API_KEY,
    max_length=256,
    stop_words=["Observation:"],
    model_kwargs={"temperature": 0.1},
)

# Conversational pipeline with gpt-4o
agent_prompt_node_gpt_4 = PromptNode(
    "gpt-4o",
    api_key=OPENAI_API_KEY,
    max_length=256,
    stop_words=["Observation:"],
    model_kwargs={"temperature": 0.1},
)

# In memory
memory_prompt_node = PromptNode(
    "philschmid/flan-t5-base-samsum", # bart-large-cnn-samsum or any model that peforms good in summarization
    max_length=256,
    model_kwargs={"task_name": "text2text-generation"}
)
memory = ConversationSummaryMemory(
    memory_prompt_node,
    prompt_template="{chat_transcript}"
)

# Define the Conversational with Agents prompt
agent_prompt = """
In the following conversation, a human user interacts with the AI Agent that has access to the documentation of Press Corner.
Press Corner is the official portal of Press material from the European Commission Spokesperson's Service.
The human poses questions and AI Agent should try to find an answer to every question.
The final answer to the question should be truthfully based solely on the output of the tool.
The AI Agent should ignore its knowledge when answering the questions.

The AI Agent has access to this tool:
{tool_names_with_descriptions}

The following is the previous conversation between a human and The AI Agent:
{memory}

AI Agent responses must start with one of the following:

Thought: [the AI Agent's reasoning process]
Tool: [tool names] (on a new line) Tool Input: [input as a question for the selected tool WITHOUT quotation marks and on a new line] (These must always be provided together and on separate lines.)
Observation: [tool's result]
Final Answer: (on a new line) [final answer to the human user's question]
When selecting a tool, the AI Agent must provide both the "Tool:" and "Tool Input:" pair in the same response, but on separate lines.

The AI Agent should not ask the human user for additional information, clarification, or context.
If the AI Agent cannot find a specific answer after exhausting available tools and approaches, it answers with Final Answer: inconclusive

Question: {query}
Thought:
{transcript}
"""

config.json:   0%|          | 0.00/1.53k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/990M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

In [16]:
# Define the tools
def create_tool(pipeline, name, description):
    """
    Creates a Tool Haystack's object that represents a specific processing pipeline or node.

    Parameters:
      pipeline (Pipeline or Node): The processing pipeline or node to be used as a tool.
      name (str): The name of the tool.
      description (str): Guidelines for each agent's usage.

    Returns:
      Tool: Configured tool object with the specified pipeline, name, and guidelines.
    """
    return Tool(
        name=name,
        pipeline_or_node=pipeline,
        description=description,
        output_variable="answers",
)

# Create your preferred tools, including guidelines for each agent's usage.
tools = [
    create_tool(
        rag_pipeline_pc, "PressCorner",
        "useful for when you need to answer questions about the Press Corner website of the European Commission."
    ),
    create_tool(
        websearch_pipeline_gpt_4,
        "Web_Search",
        "useful for when you need to search the web for answering questions.",
    ),
    create_tool(
        rag_pipeline_research,
        "Research",
        "useful for when you need to answer questions about Research and journal Papers by the European Commission"
    ),
    create_tool(
        rag_pipeline_inmemory,
        "In_memory",
        "useful for when you need to answer questions with documents stored in memory"
    ),
]

# Initialize the conversational agent using GPT-3.5-turbo
conversational_agent = ConversationalAgent(
    prompt_node=agent_prompt_node,
    tools=tools,
    memory=memory,
    prompt_template=agent_prompt
)

# Initialize the conversational agent using GPT-4o
conversational_agent_gpt_4 = ConversationalAgent(
    prompt_node=agent_prompt_node_gpt_4,
    tools=tools,
    memory=memory,
    prompt_template=agent_prompt
)

#### Data Indexing (Optional, only in start when you need to index your data)

##### Research

In [None]:
# Add the pdfs of your preference in research folder and run this for index them.
all_docs = convert_files_to_docs(
    dir_path=research_directory,
    split_paragraphs=True
)
# Index the documents related to Research
indexing_pipeline_research.run(
    documents=all_docs
)

##### Press Corner

In [None]:
# Define the paths
parent = Path(inspect.stack()[0].filename).parent
output = parent.joinpath()

# After the data are retrieved, and stored in the presscorner folder, continue with indexing
files_path = output.joinpath('files')
if not files_path.exists():
    files_path.mkdir()

In [None]:
parent = Path(inspect.stack()[0].filename).parent
output = parent.joinpath()

# After the data are retrieved, and stored in the presscorner folder, continue with indexing
files_path = output.joinpath('files')
if not files_path.exists():
    files_path.mkdir()

for json_file in Path(pc_directory).glob('*.json'):
  print(json_file)
  with open(json_file, 'r', encoding='utf-8') as f:
      data = json.load(f)
      print(data)

  # Extracting only the content key's data
  content_data = {"content": data.get("content", "")}

  # Defining the new filename with category prefix to avoid conflicts
  # and saving it in the 'files' folder
  nfile_path = files_path.joinpath( f"{json_file.name}")
  with open(nfile_path, 'w', encoding='utf-8') as nf:
      json.dump(content_data, nf, indent=4)

json_folder_path = files_path  # Folder path
txt_folder_path = output.joinpath('text_files')  # Folder to save text files
txt_folder_path.mkdir(parents=True, exist_ok=True)  # Create the folder if it doesn't exist

for json_file in json_folder_path.glob('*.json'):
    with open(json_file, 'r', encoding='utf-8') as f:
        data = json.load(f)
        content = data.get("content", "")

    # Define the new path with the same name but with a .txt extension
    txt_file_path = txt_folder_path / json_file.with_suffix('.txt').name
    with open(txt_file_path, 'w', encoding='utf-8') as f:
        f.write(content)

documents = []
for txt_file in txt_folder_path.glob('*.txt'):
    with open(txt_file, 'r', encoding='utf-8') as file:
        content = file.read()
        documents.append(Document(content=content))

# Run the indexing pipeline
indexing_pipeline_pc.run(documents=documents)

#### Deployment

In [17]:
# Set up basic configuration for logging
logging.basicConfig(
    level=logging.INFO
)

# Setup for FastAPI and Jinja2 Templates
app = FastAPI()

# Mount the static directory to serve CSS and JS files
app.mount("/static",
          StaticFiles(directory=static_directory),
          name="static"
)

# Setup templates directory
templates = Jinja2Templates(
    directory=templates_directory
)

# Global list to store conversation history
conversation = []

@app.get("/", response_class=HTMLResponse)
async def get_form(request: Request):
    """
    Rrender the chat interface.

    Serves the main HTML page for the chat interface, displaying
    the current conversation history. It is intended to be used as the starting
    point for user interactions with the app.

    Parameters:
      request: The incoming HTTP request object.

    Returns:
      TemplateResponse: An HTML response rendering the chat interface with the
      current conversation history.
    """
    # Render the chat template with the conversation history
    return templates.TemplateResponse("main.html", {"request": request, "messages": conversation})

@app.post("/result", response_class=HTMLResponse)
async def show_result(request: Request, question: str = Form(...), mode: str = Form(...), model: str = Form(...)):
    """
    Process a question and provide a response using various processing modes and models.

    Takes a user question and processes it using a selected mode and model.
    It supports different models like GPT-3.5 and GPT-4o across various modes, including
    In Memory processing, Research, PressCorner, Conversational, and Web Search. The processed
    results are rendered as an HTML response using a template.

    Parameters:
      request: The incoming HTTP request object.
      question: The user's question submitted through a form.
      mode: The processing mode, which determines how the question is handled.
      model: The Gen AI model to use for processing the question.

    Returns:
      TemplateResponse: An HTML response rendering the conversation including the user's question and the Gen AI's answer.

    Raises:
      HTTPException 500: If there is an error during processing.
    """
    try:
        if mode == "inmemory":
            # In Memory processing
            if model == "gpt-3.5":
                # In Memory RAG model processing GPT-3.5
                response = rag_pipeline_inmemory.run(query=question)
                answer = response['answers'][0].answer
                message_content = answer  # No HTML tags here, just plain text
            elif model == "gpt-4o":
                # In Memory RAG model processing GPT-4o
                response = rag_pipeline_gpt_4o_inmemory.run(query=question)
                answer = response['answers'][0].answer
                message_content = answer  # No HTML tags here, just plain text

        elif mode == "research":
            # Research processing
            if model == "gpt-3.5":
                # Research RAG model processing GPT-3.5
                response = rag_pipeline_research.run(query=question)
                answer = response['answers'][0].answer
                message_content = answer  # No HTML tags here, just plain text
            elif model == "gpt-4o":
                # Research RAG model processing GPT-4o
                response = rag_pipeline_gpt_4o_reseach.run(query=question)
                answer = response['answers'][0].answer
                message_content = answer  # No HTML tags here, just plain text

        elif mode == "presscorner":
            # PressCorner processing
            if model == "gpt-3.5":
                # PressCorner RAG model processing GPT-3.5
                response = rag_pipeline_pc.run(query=question)
                answer = response['answers'][0].answer
                message_content = answer  # No HTML tags here, just plain text
            elif model == "gpt-4o":
                # PressCorner RAG model processing GPT-4o
                response = rag_pipeline_gpt_4o_pc.run(query=question)
                answer = response['answers'][0].answer
                message_content = answer  # No HTML tags here, just plain text

        elif mode == "conversational":
            # Conversational Agent processing
            if model == "gpt-3.5":
                # Conversational Agent processing GPT-3.5
                response = conversational_agent.run(question)
                # Conversational Agent processing GPT-4o
            elif model == "gpt-4o":
                response = conversational_agent_gpt_4.run(question)

            if response['answers']:
                first_answer = response['answers'][0]
                answer = first_answer.answer if first_answer.answer else 'No answer provided'

                # Extract and format the additional information
                parts = response.get('transcript', '').split('\n')
                formatted_parts = '<br><br>'.join(parts)

                message_content = f"{answer}<br><br>{formatted_parts}"
            else:
                message_content = "No answer provided"

        elif mode == "websearch":
            # Web search processing
            if model == "gpt-3.5":
                response = websearch_pipeline.run(query=question, params={})
                answer = response["answers"][0].answer
                message_content = answer  # No HTML tags here, just plain text
            elif model == "gpt-4o":
                response = websearch_pipeline_gpt_4.run(query=question, params={})
                answer = response["answers"][0].answer
                message_content = answer  # No HTML tags here, just plain text

        else:
            message_content = f"Invalid mode selected: {mode}"

    except Exception as e:
        logging.error(f"An error occurred: {str(e)}")
        message_content = f"An error occurred: {e}"

    # Append user Q&A to the conversation history
    conversation.append({"class": "user-message", "content": question, "format": "html"})
    conversation.append({"class": "convo-message", "content": message_content, "format": "html"})

    # Render the response using the "main.html" template
    return templates.TemplateResponse("main.html", {"request": request, "messages": conversation, "last_mode": mode, "last_model": model})

@app.post("/upload_and_preview")
async def upload_and_preview(file: UploadFile = File(...)):
    """
    Upload a file and preview its content.

    Accepts a file upload, validates the file type,
    extracts its text content, and returns the content for previewing.

    Parameters:
      file: An UploadFile object representing the file to be uploaded.
            It supports the following content types: text/plain, application/pdf,
            application/msword, and application/vnd.openxmlformats-officedocument.wordprocessingml.document.

    Returns:
      A JSON response containing:
          message: A confirmation message that the document is ready for review.
          fileName: The name of the uploaded file.
          content: The extracted text content of the file.

    Raises:
      HTTPException 400: If the file format is not supported.
      HTTPException 500: If there is an internal server error during file processing or text extraction.
    """
    # Validate the uploaded file's content type
    if file.content_type not in ["text/plain", "application/pdf", "application/msword", "application/vnd.openxmlformats-officedocument.wordprocessingml.document"]:
        raise HTTPException(status_code=400, detail="File format not supported")

    try:
        # Create a temporary directory to store the uploaded file
        with tempfile.TemporaryDirectory() as temp_dir:
            temp_file_path = os.path.join(temp_dir, file.filename)

            # Write the uploaded file to the temporary directory
            with open(temp_file_path, "wb") as temp_file:
                temp_file.write(await file.read())

            # Extract text content from the uploaded file
            content_str = extract_text_from_file(temp_file_path, file.content_type)

        return {
            "message": "Document ready for review.",
            "fileName": file.filename,
            "content": content_str
        }
    except Exception as e:
        logging.error(f"An error occurred: {str(e)}")
        raise HTTPException(status_code=500, detail="An internal server error occurred.")

@app.post("/index_file")
async def index_file(file: UploadFile = File(...)):
    """
    Upload a file and index its content.

    Accepts a file upload, validates the file type,
    converts the file content into documents, and indexes them for
    future retrieval in the Research index.

    Parameters:
      file: An UploadFile object representing the file to be uploaded.
            Supported content types are text/plain, application/pdf,
            application/msword, and application/vnd.openxmlformats-officedocument.wordprocessingml.document.

    Returns:
      A JSON response containing:
          message: A confirmation message indicating successful upload and indexing.

    Raises:
      HTTPException 400: If the file format is not supported.
      HTTPException 500: If there is an internal server error during file processing or indexing.
    """
    # Validate the uploaded file's content type
    if file.content_type not in ["text/plain", "application/pdf", "application/msword", "application/vnd.openxmlformats-officedocument.wordprocessingml.document"]:
        raise HTTPException(status_code=400, detail="File format not supported")

    try:
        # Create a temporary directory to store the uploaded file
        with tempfile.TemporaryDirectory() as temp_dir:
            temp_file_path = os.path.join(temp_dir, file.filename)

            # Write the uploaded file to the temporary directory
            with open(temp_file_path, "wb") as temp_file:
                temp_file.write(await file.read())

            # Convert file content to documents and prepare for indexing
            all_docs_research = convert_files_to_docs(dir_path=temp_dir, split_paragraphs=True)

            # Index the documents using the predefined Research indexing pipeline
            indexing_pipeline_research.run(documents=all_docs_research)

        return {"message": "Document uploaded and indexed successfully."}
    except Exception as e:
        logging.error(f"An error occurred during indexing: {str(e)}")
        raise HTTPException(status_code=500, detail="An internal server error occurred.")

@app.post("/upload_and_index_inmemory")
async def upload_and_index_inmemory(file: UploadFile = File(...)):
    """
    Upload a file and index its content in memory.

    Accepts a file upload, validates the file type,
    processes the file content into documents, and indexes them
    using an In Memory pipeline for efficient retrieval.

    Parameters:
      file: An UploadFile object representing the file to be uploaded.
            Supported content types include text/plain, application/pdf,
            application/msword, and application/vnd.openxmlformats-officedocument.wordprocessingml.document.

    Returns:
      JSONResponse: A JSON response with a message indicating successful upload and indexing.

    Raises:
      HTTPException 400: If the file format is not supported.
      HTTPException 500: If there is an internal server error during file processing or indexing.
    """
    # Validate the uploaded file's content type
    if file.content_type not in ["text/plain", "application/pdf", "application/msword", "application/vnd.openxmlformats-officedocument.wordprocessingml.document"]:
        raise HTTPException(status_code=400, detail="File format not supported")

    try:
        # Create a temporary directory to store the uploaded file
        with tempfile.TemporaryDirectory() as temp_dir:
            temp_file_path = os.path.join(temp_dir, file.filename)

            # Write the uploaded file to the temporary directory
            with open(temp_file_path, "wb") as temp_file:
                temp_file.write(await file.read())

            # Convert file content to documents and prepare for In Memory indexing
            all_docs_inmemory = convert_files_to_docs(dir_path=temp_dir, split_paragraphs=True)

            # Index the documents using the In Memory indexing pipeline
            indexing_pipeline_inmemory.run(documents=all_docs_inmemory)

        return JSONResponse(status_code=200, content={"message": "Document uploaded and indexed successfully."})
    except Exception as e:
        logging.error(f"An error occurred: {str(e)}")
        raise HTTPException(status_code=500, detail="Internal server error occurred.")

@app.post("/clear_chat", response_class=HTMLResponse)
async def clear_chat(request: Request):
    """
    Clear the current chat and associated data.

    Used to reset the conversation state by clearing
    the chat history, In Memory memory, and deleting documents stored
    in the In Memory document store. This is useful for starting a new
    conversation or session with no previous context.

    Parameters:
      request: The incoming HTTP request object.

    Returns:
      HTMLResponse: An empty response indicating the chat has been cleared.

    Raises:
      HTTPException 500: If there is an error during the clearing process.
    """
    try:
        logging.info("Clear chat endpoint called.")

        # Clear the conversation history
        conversation.clear()

        # Clear the In Memory memory of the conversation
        memory.clear()

        # Delete all documents stored in the In Memory document store
        document_store_inmemory.delete_documents()

        logging.info("Conversation cleared.")
    except Exception as e:
        logging.error(f"Error in clear_chat endpoint: {str(e)}")
        raise HTTPException(status_code=500, detail="Internal server error occurred.")

In [18]:
# Run the app and click on the URL
def run_app():
    """
    Starts the FastAPI application server.

    Initializes and runs the FastAPI app using Uvicorn.
    The server listens on all interfaces at the defined port and maintains connections with a keep-alive timeout.

    Effects:
      Starts the server in a new thread to allow asynchronous operations.

    Notes:
      Runs in a separate thread enabling reruns.
    """
    uvicorn.run(app, host='0.0.0.0', port=8002, timeout_keep_alive=600)

# Start the server in a separate thread
thread = Thread(target=run_app)
thread.start()

# Print the URL to access the app in Google Colab
print(eval_js("google.colab.kernel.proxyPort(8002)"))

INFO:     Started server process [1303]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8002 (Press CTRL+C to quit)


https://2ucmwkgzu35-496ff2e9c6d22116-8002-colab.googleusercontent.com/
