In [2]:
from langchain.chains import RetrievalQA
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import WebBaseLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import GPT4All
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from pdf2image import convert_from_path




loader = WebBaseLoader("https://github.com/SAP/styleguides/blob/main/clean-abap/CleanABAP.md")
data = loader.load()


text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=3)
texts = text_splitter.split_documents(data)

##PDF
# loader = PyPDFLoader("C://Users/pogawal/WorkFolder/Documents/Python Scripts/langchain_experiments/2023_Annual_Report.pdf")
# loader = PyPDFLoader("C://Users/pogawal/WorkFolder/Documents/Python Scripts/langchain_experiments/2023_Annual_Report.pdf")
# documents = loader.load_and_split()



# text_splitter = RecursiveCharacterTextSplitter(
#     chunk_size=1024,
#     chunk_overlap=64
# )
# texts = text_splitter.split_documents(documents)


In [3]:
len(texts)

8

In [6]:
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)
llm_path = "C://Users/pogawal/AppData/Local/nomic.ai/GPT4All/gpt4all-falcon-newbpe-q4_0.gguf"

llm = GPT4All(
    model=llm_path,
    backend="gptj",
    verbose=False
)

In [7]:
db = Chroma.from_documents(texts, embeddings, persist_directory="db")

In [8]:
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=db.as_retriever(search_kwargs={"k": 8}),
    return_source_documents=True,
    verbose=False,
)

In [None]:
res = qa(f"""
    what are the rules for clean ABAP?.
""")
print(res["result"])

In [None]:
res = qa(f"""
    What should I do to have good robust ABAP code?
""")
print(res["result"])

In [None]:
res = qa(f"""
    What is a good method name in ABAP?
""")
print(res["result"])

In [None]:
res = qa(f"""
    What is clean ABAP?
    Describe it in 5 sentences.
""")
print(res["result"])