In [1]:
print("OK")

OK


# Q&A over the Code Base to Understand How it Works

In [2]:
from git import Repo
import os

from langchain.text_splitter import Language
from langchain.document_loaders.generic import GenericLoader
from langchain.document_loaders.parsers import LanguageParser
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationSummaryMemory
from langchain.chains import ConversationalRetrievalChain

In [3]:
%pwd

'/home/fahad/Documents/Projects/SPL Excessories/SPL3/others/Test v2'

In [4]:
repo_path = "pytagfix-master"

## Load Project (Documents)

In [5]:
loader = GenericLoader.from_filesystem(repo_path,
                                       glob = "**/*",
                                       suffixes=[".py"],
                                       parser = LanguageParser(language=Language.PYTHON, parser_threshold=500)
)

In [6]:
documents = loader.load()

In [7]:
documents

[Document(metadata={'source': 'pytagfix-master/test.py', 'language': <Language.PYTHON: 'python'>}, page_content='# custom type\nclass Student:\n    def __init__(self, name: str, age: int, school: str):\n        self.name = name\n        self.age = age\n        self.school = school\n\n\ndef print_student_details(student: Student) -> None:\n    print(student.name, student.age)\n\n\n# newtype\nfrom typing import NewType\n\nStudentID = NewType("StudentID", int)\nCourseID = NewType("CourseID", int)\n\n\ndef get_student(student_id: StudentID) -> None:\n    pass\n\n\ndef enroll_in_course(student_id: StudentID, course_id: CourseID) -> None:\n    pass\n\n\n# generics\nfrom typing import List, TypeVar\n\nT = TypeVar("T")\n\n\ndef first_element(lst: List[T]) -> T:\n    return lst[0]\n\n\nfrom typing import Generic, TypeVar\n\nT = TypeVar("T")\n\n\nclass Box(Generic[T]):\n    def __init__(self, value: T):\n        self.value = value\n\n    def get(self) -> T:\n        return self.value\n\n\ndef ca

In [8]:
len(documents[0].page_content)

1946

In [9]:
documents[0]

Document(metadata={'source': 'pytagfix-master/test.py', 'language': <Language.PYTHON: 'python'>}, page_content='# custom type\nclass Student:\n    def __init__(self, name: str, age: int, school: str):\n        self.name = name\n        self.age = age\n        self.school = school\n\n\ndef print_student_details(student: Student) -> None:\n    print(student.name, student.age)\n\n\n# newtype\nfrom typing import NewType\n\nStudentID = NewType("StudentID", int)\nCourseID = NewType("CourseID", int)\n\n\ndef get_student(student_id: StudentID) -> None:\n    pass\n\n\ndef enroll_in_course(student_id: StudentID, course_id: CourseID) -> None:\n    pass\n\n\n# generics\nfrom typing import List, TypeVar\n\nT = TypeVar("T")\n\n\ndef first_element(lst: List[T]) -> T:\n    return lst[0]\n\n\nfrom typing import Generic, TypeVar\n\nT = TypeVar("T")\n\n\nclass Box(Generic[T]):\n    def __init__(self, value: T):\n        self.value = value\n\n    def get(self) -> T:\n        return self.value\n\n\ndef cal

## Split Document to store in Vector DB

In [10]:
documents_splitter = RecursiveCharacterTextSplitter.from_language(language = Language.PYTHON,
                                                             chunk_size = 500,
                                                             chunk_overlap = 20)

In [11]:
texts = documents_splitter.split_documents(documents)

In [12]:
len(texts[0].page_content)

13

In [13]:
len(texts)

40

## Generate Embedding

In [14]:
from dotenv import load_dotenv
load_dotenv() 

# Embedding models: https://python.langchain.com/v0.1/docs/integrations/text_embedding/
GOOGLE_API_KEY = os.getenv("GEMINI_API_KEY")

In [15]:
os.environ["GEMINI_API_KEY"] = GOOGLE_API_KEY

In [16]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

# embeddings=OpenAIEmbeddings(disallowed_special=())
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY)

  from .autonotebook import tqdm as notebook_tqdm


In [17]:
from langchain_chroma import Chroma

vectordb = Chroma.from_documents(documents=texts, embedding=embeddings)

In [18]:
retriever = vectordb.as_retriever()
retriever

VectorStoreRetriever(tags=['Chroma', 'GoogleGenerativeAIEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x7078dc1aeed0>)

## Query Vector DB & Create QA Agent

## Testing Path 1️⃣

In [19]:
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are an assistant for answering query realted to coding. "
    "Use the following pieces of retrieved context to answer the question "
    "If you don't know the answer, say that you don't know."
    "Answer queries to the point and relevant format"
    "\n\n"
    "{context}"
)


chat_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [20]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro",temperature=0.5, max_tokens=4018, google_api_key=GOOGLE_API_KEY)

In [21]:
from langchain.chains.combine_documents import create_stuff_documents_chain

question_answering_chain=create_stuff_documents_chain(llm, chat_prompt)

In [22]:
from langchain.chains import create_retrieval_chain

rag_chain = create_retrieval_chain(retriever, question_answering_chain)

In [23]:
from langchain_core.messages import HumanMessage, AIMessage

In [24]:
chat_history = []

In [25]:
question1 = "which one is the main python script in pytagfix-master?\n answer in one line"

In [26]:
message1= rag_chain.invoke({"input": question1, "chat_history": chat_history})
print(message1["answer"])

The main Python script in pytagfix-master is not shown in the provided code.



## Current Best Implementation 🔽

In [27]:
memory = ConversationSummaryMemory(llm=llm, memory_key="chat_history", output_key='answer', return_messages=True)

In [28]:
qa = ConversationalRetrievalChain.from_llm(
    llm, 
    retriever=vectordb.as_retriever(search_type="mmr", search_kwargs={"k": 8}), 
    memory=memory, 
    return_source_documents=True, 
    output_key="answer"  # Specify the output key to resolve ambiguity
)

In [None]:
question = ''' 
what are the python scripts in this project? mention the names.
'''

In [34]:
result = qa(question)
print(result['answer'])

This code snippet doesn't define any complete Python scripts. It shows parts of functions and other code fragments, but there's no indication of a top-level script name or how these pieces fit together into a complete program.



## Ask your LLM

In [None]:
# source_code = 'def walk_metas(app):\n # Iterates from deepest to shallowest meta-apps meta_list = [app]  # shallowest to deepest meta = app while (meta := meta._meta) and meta.default_command:'
 
question = '''
This is an object containing pyre error message and a warning line
{
    message = 'In call `ResolvedCommand.__init__`, for 4th positional argument, expected `Optional[Group]` but got `Union[None, Group, str]`.'
    rule_id = 'Incompatible parameter type [6]'
    source_code = 'def walk_metas(app):\n # Iterates from deepest to shallowest meta-apps meta_list = [app]  # shallowest to deepest meta = app while (meta := meta._meta) and meta.default_command:',
    warning_line = 'apps[-1].group_parameters,'
}

Now do as follows:
1. find the method where the `warning_line` is present
2. inspect error for that method
3. provide ONLY the correct code snippets
4. short explanation
'''

In [None]:
result = qa({"question": question})
print(result['answer'])