In [1]:
from langchain_openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI
from langchain_community.document_loaders import DirectoryLoader
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter
import os
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain

In [3]:
def clear_screen():

    ''' Clears the screen based on the operating system. '''

    if os.name == 'nt':  
        os.system('cls')
    else: 
        os.system('clear')

In [4]:

def rag_llm(doc_loader, query):

    ''' 
        Function that creates a RAG LLM model to answer a query based on the provided context.
    
        Parameters:
        doc_loader: DocumentLoader
            The document loader object that loads the context can be .pdf or .eml.
        query: str
            The query to be answered based on the context.
    '''

    model = "gpt-3.5-turbo"
    llm = ChatOpenAI(model=model, api_key=os.environ.get('OPENAI_API_KEY'))

    doc = doc_loader.load()    
    embeddings = OpenAIEmbeddings()
    text_splitter = RecursiveCharacterTextSplitter()

    documents = text_splitter.split_documents(doc)
    vector_store = FAISS.from_documents(documents, embeddings)

    prompt = ChatPromptTemplate.from_template("""Answer the following question based only on the provided context:
    <context>
    {context}
    </context>
    Question: {input}""")

    document_chain = create_stuff_documents_chain(llm, prompt)
    retriever = vector_store.as_retriever()
    retrieval_chain = create_retrieval_chain(retriever, document_chain)

    response_query = retrieval_chain.invoke({
        'input': query
    })

    return response_query


In [5]:

def main():

    ''' 
        Main function that allows the user to choose the query type and make 
        a query based on the email texts or PDF attachment files.
    '''

    choice_menu = input("Choose the query you want to make:\n 1 - Query based on email texts \n 2 - Query based on PDF attached to emails\n")
    clear_screen()

    match choice_menu:
        case "1":
            query_1 = input("Enter your question, or query, based on the email messages received: ")
            clear_screen()
            loader_1 = DirectoryLoader(path='emails', glob='**/*.eml')
            response_query_1 = rag_llm(loader_1, query_1)
            print(response_query_1['answer'])

        case "2":
            query_2 = input("Enter your question, or query, based on the PDF attachment files: ")
            clear_screen()
            loader_2 = DirectoryLoader(path='emails', glob='**/*.pdf')
            response_query_2 = rag_llm(loader_2, query_2)
            print(response_query_2['answer'])
            

        case _:
            print("Invalid query")

In [8]:
if __name__ == '__main__':
    main()
    

[H[2J[H[2JThe evaluation email for SECOMP 2023 was sent by PET Computação.
