In [9]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.schema import Document
from ipywidgets import Layout
import ipywidgets as widgets
from IPython.display import display, clear_output, HTML
import os
from langchain.vectorstores import Chroma
from langchain.chains.question_answering import load_qa_chain


# Define constants
CONTEXT = "context"
HUMAN_INPUT = "human_input"
CHAT_HISTORY_INDICATOR = "chat_history_indicator"
OPENAI_CHAT_MODEL = "gpt-3.5-turbo"
TOP_DOC_NUM = 3
SUPPORT_DOC_PATH = None
OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')



# Use the home directory as the base path for your writable directory
home_directory = os.environ['HOME']
# Specify a subdirectory within the home directory
SUPPORT_DOC_FOLDER_PATH = os.path.join(home_directory, 'support_docs')
# SUPPORT_DOC_FOLDER_PATH = '../support_docs'

script_chain = None

def return_generate_ai_script(template_text: str, user_query: str) -> str:
    """
    Generate an AI script and append it to a template.

    Args:
        template_text (str): Template for LLM model.
        message_text (str): User Question.
        retrieved_docs (list): List of retrieved documents.

    Returns:
        str: AI output string.
    """
    
    global script_chain

    # Load an embedding model from OpenAI
    embeddings = OpenAIEmbeddings()

    all_docs = list(process_doc_files(SUPPORT_DOC_FOLDER_PATH).values())

    # Transform my docs into vectors and store that into a database (chroma) for managing and querying the embeddings
    docsearch = Chroma.from_texts(all_docs, embeddings)
    
    # Use cosign similarity to perform the search for documents similar to the user query
    similar_docs = docsearch.similarity_search(user_query, k = 1)    
    
    # Initialize script_chain if it doesn't exist
    if script_chain is None:
            
        # Input for the prompt
        prompt = PromptTemplate(input_variables=[CHAT_HISTORY_INDICATOR, HUMAN_INPUT, CONTEXT], template=template_text)
    
        # Input for the Memory class
        memory = ConversationBufferMemory(memory_key=CHAT_HISTORY_INDICATOR, input_key = HUMAN_INPUT )

        # Load LLM model
        llm = ChatOpenAI(model_name=OPENAI_CHAT_MODEL, temperature=0)        

        # Feed LLM model, memory object, and prompt to the Q and A chain function
        script_chain = load_qa_chain(llm = llm, chain_type="stuff", memory= memory, prompt=prompt)
        
    gen_ai_output = script_chain({"input_documents": similar_docs, HUMAN_INPUT: user_query}, return_only_outputs=True)

    print('Chain memory: ', script_chain.memory.buffer)

    return gen_ai_output['output_text']


def process_doc_files(SUPPORT_DOC_FOLDER_PATH) -> dict:
    """
    Process text files in a folder and return a dictionary with file names as keys and content as values.

    :param folder_path: The path to the folder containing the text files.
    :return: A dictionary with file names as keys and content as values.
    """
    # Initialize an empty dictionary to store the results
    doc_dict = {}

    # Check if the folder exists
    if not os.path.exists(SUPPORT_DOC_FOLDER_PATH):
        return doc_dict  # Return an empty dictionary if the folder does not exist

    # List all files in the folder
    file_list = os.listdir(SUPPORT_DOC_FOLDER_PATH)

    # Iterate through the files
    for filename in file_list:
        # Check if the file has a .txt extension
        if filename.endswith(".txt"):
            # Create the full path to the file
            file_path = os.path.join(SUPPORT_DOC_FOLDER_PATH, filename)

            # Open the file and read its content
            with open(file_path, 'r', encoding='utf-8') as file:
                file_content = file.read()

            # Store the content in the dictionary with the filename as the key
            doc_dict[filename] = file_content

    return doc_dict



In [None]:

# def download_s3_folder(bucket_name, folder_name, local_directory):
        
#     session = boto3.Session(
#         aws_access_key_id=AWS_ACCESS_KEY_ID,
#         aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
#         region_name=AWS_DEFAULT_REGION
#     )
    
#     s3 = session.resource('s3')
#     my_bucket = s3.Bucket(bucket_name)
    
#     # Ensure the local directory exists
#     if not os.path.exists(local_directory):
#         os.makedirs(local_directory)

#     for s3_object in my_bucket.objects.filter(Prefix=folder_name):
#         local_file_path = os.path.join(local_directory, os.path.basename(s3_object.key))
#         local_file_path = local_file_path.rstrip('/')  # Remove trailing slashes
#         print(s3_object.key, local_file_path)        
#         try:
#             my_bucket.download_file(s3_object.key, local_file_path)
#         except Exception as e:
#             pass
