In [10]:
from langchain.chains import RetrievalQA
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import WebBaseLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import GPT4All
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from pdf2image import convert_from_path


loader = WebBaseLoader("https://github.com/SAP/styleguides/blob/main/clean-abap/CleanABAP.md")
data = loader.load()


text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=3)
texts = text_splitter.split_documents(data)

##PDF
# loader = PyPDFLoader("C://Users/pogawal/WorkFolder/Documents/Python Scripts/langchain_experiments/2023_Annual_Report.pdf")
# loader = PyPDFLoader("C://Users/pogawal/WorkFolder/Documents/Python Scripts/langchain_experiments/2023_Annual_Report.pdf")
# documents = loader.load_and_split()



# text_splitter = RecursiveCharacterTextSplitter(
#     chunk_size=1024,
#     chunk_overlap=64
# )
# texts = text_splitter.split_documents(documents)


In [3]:
len(texts)

8

In [4]:
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)
llm_path = "C://Users/pogawal/AppData/Local/nomic.ai/GPT4All/gpt4all-falcon-newbpe-q4_0.gguf"

llm = GPT4All(
    model=llm_path,
    backend="gptj",
    verbose=False
)

  from .autonotebook import tqdm as notebook_tqdm


In [22]:
sentence1 = "tigers"
sentence2 = "lions"
sentence3 = "dogs"
embedding1 = embeddings.embed_query(sentence1)
embedding2 = embeddings.embed_query(sentence2)
embedding3 = embeddings.embed_query(sentence3)
import numpy as np

print(np.dot(embedding1,embedding2))
print(np.dot(embedding1,embedding3))
print(np.dot(embedding2,embedding3))

0.6565375536484352
0.4976050388323227
0.46362199656619735


In [5]:
texts = [
    """The Amanita phalloides has a large and imposing epigeous (aboveground) fruiting body (basidiocarp).""",
    """A mushroom with a large fruiting body is the Amanita phalloides. Some varieties are all-white.""",
    """A. phalloides, a.k.a Death Cap, is one of the most poisonous of all known mushrooms.""",
]
smalldb = Chroma.from_texts(texts, embedding=embeddings)

In [7]:
question = "Tell me about all-white mushrooms with large fruiting bodies"
smalldb.similarity_search(question, k=2)

[Document(page_content='A mushroom with a large fruiting body is the Amanita phalloides. Some varieties are all-white.'),
 Document(page_content='A. phalloides, a.k.a Death Cap, is one of the most poisonous of all known mushrooms.')]

In [8]:
smalldb.max_marginal_relevance_search(question,k=2, fetch_k=3)

[Document(page_content='A mushroom with a large fruiting body is the Amanita phalloides. Some varieties are all-white.'),
 Document(page_content='A. phalloides, a.k.a Death Cap, is one of the most poisonous of all known mushrooms.')]

In [11]:
db = Chroma.from_documents(texts, embeddings, persist_directory="db")

In [29]:
question = 'are there dogs?'
docs = db.similarity_search(question,k=3)

Document(page_content='available evidence.', metadata={'page': 50, 'source': 'C://Users/pogawal/WorkFolder/Documents/Python Scripts/langchain_experiments/2023_Annual_Report.pdf'})

In [30]:
docs

[Document(page_content='available evidence.', metadata={'page': 50, 'source': 'C://Users/pogawal/WorkFolder/Documents/Python Scripts/langchain_experiments/2023_Annual_Report.pdf'}),
 Document(page_content='available evidence.', metadata={'page': 50, 'source': 'C://Users/pogawal/WorkFolder/Documents/Python Scripts/langchain_experiments/2023_Annual_Report.pdf'}),
 Document(page_content='available evidence.', metadata={'page': 50, 'source': 'C://Users/pogawal/WorkFolder/Documents/Python Scripts/langchain_experiments/2023_Annual_Report.pdf'})]

In [12]:
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=db.as_retriever(search_kwargs={"k": 8}),
    return_source_documents=True,
    verbose=False,
)

In [13]:
res = qa(f"""
    what are the rules for clean ABAP?.
""")
print(res["result"])

  warn_deprecated(



The Clean ABAP coding standard is a set of guidelines that aim to improve code quality and maintainability. It includes several rules, such as:

1. Use consistent naming conventions: Use consistent naming conventions throughout your codebase to make it easier to understand and maintain.
2. Keep comments brief: Write concise and meaningful comments to explain the purpose and behavior of each line of code.
3. Avoid unnecessary complexity: Avoid adding unnecessary complexity to your code, as this can lead to maintenance issues in the future.
4. Use consistent formatting: Use consistent formatting throughout your codebase to make it easier to read and understand.
5. Test thoroughly: Thoroughly test your code before releasing it to production to ensure that it is reliable and bug-free.
6. Follow coding conventions: Follow coding conventions such as indentation, spacing, and naming conventions to make your code more readable and maintainable.
7. Use version control: Use version control syst

In [None]:
res = qa(f"""
    What should I do to have good robust ABAP code?
""")
print(res["result"])

In [None]:
res = qa(f"""
    What is a good method name in ABAP?
""")
print(res["result"])

In [None]:
res = qa(f"""
    What is clean ABAP?
    Describe it in 5 sentences.
""")
print(res["result"])