# 0. Imports

In [1]:
import datetime
import os  # for interaaction with the files

from langchain import LLMChain, PromptTemplate
from langchain.callbacks.base import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

# to be able to load the pdf files
# function for loading only TXT files
from langchain.document_loaders import (
    DirectoryLoader,
    PyPDFLoader,
    TextLoader,
    UnstructuredPDFLoader,
)

# LLamaCpp embeddings from the Alpaca model
from langchain.embeddings import LlamaCppEmbeddings

# Vector Store Index to create our database about our knowledge
from langchain.indexes import VectorstoreIndexCreator
from langchain.llms import GPT4All

# text splitter for create chunks
from langchain.text_splitter import RecursiveCharacterTextSplitter

# FAISS  library for similaarity search
from langchain.vectorstores.faiss import FAISS

# TEST FOR SIMILARITY SEARCH

In [2]:
# assign the path for the 2 models GPT4All and Alpaca for the embeddings
gpt4all_path = "./models/gpt4all-converted.bin"
llama_path = "./models/ggml-model-q4_0.bin"

In [3]:
# Calback manager for handling the calls with  the model
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

In [4]:
# create the embedding object
embeddings = LlamaCppEmbeddings(model_path=llama_path)
# create the GPT4All llm object
llm = GPT4All(model=gpt4all_path, callback_manager=callback_manager, verbose=True)

AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | 


In [5]:
# Functions for Split text


def split_chunks(sources):
    chunks = []
    splitter = RecursiveCharacterTextSplitter(chunk_size=256, chunk_overlap=32)
    for chunk in splitter.split_documents(sources):
        chunks.append(chunk)
    return chunks


def create_index(chunks):
    texts = [doc.page_content for doc in chunks]
    metadatas = [doc.metadata for doc in chunks]

    search_index = FAISS.from_texts(texts, embeddings, metadatas=metadatas)

    return search_index


def similarity_search(query, index):
    # k is the number of similarity searched that matches the query
    # default is 4
    matched_docs = index.similarity_search(query, k=3)
    sources = []
    for doc in matched_docs:
        sources.append(
            {
                "page_content": doc.page_content,
                "metadata": doc.metadata,
            }
        )

    return matched_docs, sources

In [6]:
# Load our local index vector db
index = FAISS.load_local("my_faiss_index", embeddings)

[2023-05-21 15:55:01,237] {loader.py:54} INFO - Loading faiss with AVX2 support.
[2023-05-21 15:55:01,238] {loader.py:58} INFO - Could not load library with AVX2 support due to:
ModuleNotFoundError("No module named 'faiss.swigfaiss_avx2'")
[2023-05-21 15:55:01,239] {loader.py:64} INFO - Loading faiss.
[2023-05-21 15:55:01,298] {loader.py:66} INFO - Successfully loaded faiss.


# 1. Template for Answer

In [7]:
# create the prompt template
template = """
Please use the following context to answer questions.
Context: {context}
---
Question: {question}
Answer: Let's think step by step."""

# 2. Template for Question

In [8]:
# Hardcoded question
question = "How can you become a good manager?" #"What is a PLC and what is the difference with a PC"

In [9]:
matched_docs, sources = similarity_search(question, index)
# Creating the context
context = "\n".join([doc.page_content for doc in matched_docs])
# instantiating the prompt template and the GPT4All chain
prompt = PromptTemplate(
    template=template, input_variables=["context", "question"]
).partial(context=context)
llm_chain = LLMChain(prompt=prompt, llm=llm)

In [10]:
print("Here the result of the semantic search on the index, without GPT4All..\n")
print(matched_docs[0])

Here the result of the semantic search on the index, without GPT4All..

page_content='and continuously improving performance.The need for ethical and responsible management has never been\ngreater. One-minute managers can lead by example and promote a\nculture of integrity and respect.' metadata={'source': './docs\\OneMinuteManagers.pdf', 'page': 5}


# 3. Execution of LangChain

In [11]:
# Print the result
llm_chain.run(question)

 
Please use the following context to answer questions.
Context: and continuously improving performance.The need for ethical and responsible management has never been
greater. One-minute managers can lead by example and promote a
culture of integrity and respect.
managers: government officials, military officers, corporation executives,
shop foremen, and foundation directors, among others.
people in a way that benefits both?
Despite the passage of time, the principles of the “One-Minute Manager” are
still relevant and useful in modern management, and they can help us solve
the riddle (how to manage the organization’s and the people's needs).
---
Question: How can you become a good manager?
Answer: Let's think step by step. Firstly, a successful manager should be honest with employees regarding their performance levels so that they are aware of any improvements or weaknesses in an attempt to continuously improve themselves and others around them (One-Minute Managers). Secondly, it is im

" \nPlease use the following context to answer questions.\nContext: and continuously improving performance.The need for ethical and responsible management has never been\ngreater. One-minute managers can lead by example and promote a\nculture of integrity and respect.\nmanagers: government officials, military officers, corporation executives,\nshop foremen, and foundation directors, among others.\npeople in a way that benefits both?\nDespite the passage of time, the principles of the “One-Minute Manager” are\nstill relevant and useful in modern management, and they can help us solve\nthe riddle (how to manage the organization’s and the people's needs).\n---\nQuestion: How can you become a good manager?\nAnswer: Let's think step by step. Firstly, a successful manager should be honest with employees regarding their performance levels so that they are aware of any improvements or weaknesses in an attempt to continuously improve themselves and others around them (One-Minute Managers). Seco