In [1]:
print("OK")

OK


# Q&A over the Code Base to Understand How it Works

In [2]:
from git import Repo
import os

from langchain.text_splitter import Language
from langchain.document_loaders.generic import GenericLoader
from langchain.document_loaders.parsers import LanguageParser
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationSummaryMemory
from langchain.chains import ConversationalRetrievalChain

In [3]:
%pwd

'/home/fahad/Documents/Projects/SPL Excessories/SPL3/Code Analyzer LLM/research'

In [4]:
# !mkdir test_repo

In [5]:
repo_path = "test/python-type-hinting-main"
# repo = Repo.clone_from("https://github.com/entbappy/End-to-end-Medical-Chatbot-Generative-AI", to_path=repo_path)

In [6]:
loader = GenericLoader.from_filesystem(repo_path,
                                       glob = "**/*",
                                       suffixes=[".py"],
                                       parser = LanguageParser(language=Language.PYTHON, parser_threshold=500)
)

In [7]:
documents = loader.load()

In [8]:
documents

[Document(metadata={'source': 'test/python-type-hinting-main/test.py', 'language': <Language.PYTHON: 'python'>}, page_content='from dataclasses import dataclass\nfrom typing import TypedDict\n\n\n# Define a TypedDict to explicitly type the car_data dictionary\nclass CarData(TypedDict):\n    model: str\n    year: int\n    trim_package: str\n    mass: float\n    engine_type: str\n    engine_size: str\n    engine_location: str\n    cylinders: int\n    turbo: bool\n    unit_price: float\n    colour: str\n    wheels: int\n    seats: int\n\n\nclass EngineAttributes(TypedDict):\n    engine_type: str\n    engine_size: str\n    cylinders: float\n    turbo: bool\n    engine_location: str\n\n\n@dataclass\nclass Car:\n    """Class for a particular car item in inventory."""\n\n    model: str\n    year: int\n    trim_package: str\n    mass: float\n    engine_type: str\n    engine_size: str\n    engine_location: str\n    cylinders: int\n    turbo: bool\n    unit_price: float\n    colour: str\n    whe

In [9]:
len(documents[0].page_content)

1777

In [10]:
documents[0]

Document(metadata={'source': 'test/python-type-hinting-main/test.py', 'language': <Language.PYTHON: 'python'>}, page_content='from dataclasses import dataclass\nfrom typing import TypedDict\n\n\n# Define a TypedDict to explicitly type the car_data dictionary\nclass CarData(TypedDict):\n    model: str\n    year: int\n    trim_package: str\n    mass: float\n    engine_type: str\n    engine_size: str\n    engine_location: str\n    cylinders: int\n    turbo: bool\n    unit_price: float\n    colour: str\n    wheels: int\n    seats: int\n\n\nclass EngineAttributes(TypedDict):\n    engine_type: str\n    engine_size: str\n    cylinders: float\n    turbo: bool\n    engine_location: str\n\n\n@dataclass\nclass Car:\n    """Class for a particular car item in inventory."""\n\n    model: str\n    year: int\n    trim_package: str\n    mass: float\n    engine_type: str\n    engine_size: str\n    engine_location: str\n    cylinders: int\n    turbo: bool\n    unit_price: float\n    colour: str\n    whee

In [11]:
documents_splitter = RecursiveCharacterTextSplitter.from_language(language = Language.PYTHON,
                                                             chunk_size = 500,
                                                             chunk_overlap = 20)

In [12]:
texts = documents_splitter.split_documents(documents)

In [13]:
len(texts[0].page_content)

392

In [14]:
len(texts)

33

In [15]:
from dotenv import load_dotenv
load_dotenv() 

# Embedding models: https://python.langchain.com/v0.1/docs/integrations/text_embedding/
GOOGLE_API_KEY = os.getenv("GEMINI_API_KEY")

In [16]:
os.environ["GEMINI_API_KEY"] = GOOGLE_API_KEY

In [17]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

# embeddings=OpenAIEmbeddings(disallowed_special=())
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY)

  from .autonotebook import tqdm as notebook_tqdm


In [18]:
from langchain_chroma import Chroma

vectordb = Chroma.from_documents(texts, embedding=embeddings, persist_directory='./db')

In [19]:
# vectordb.persist()

In [20]:
from langchain_google_genai import ChatGoogleGenerativeAI

# llm = ChatOpenAI(model_name="gpt-4")
# llm = ChatOpenAI()
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro",temperature=0.5, max_tokens=4018, google_api_key=GOOGLE_API_KEY)

In [21]:
memory = ConversationSummaryMemory(llm=llm, memory_key = "chat_history", return_messages=True)

In [22]:
qa = ConversationalRetrievalChain.from_llm(llm, retriever=vectordb.as_retriever(search_type="mmr", search_kwargs={"k":8}), memory=memory)

In [23]:
question = ''' 
The following object contains type related issue.
{rule_id: 'Undefined or invalid type [11]', message: 'Annotation `dict` is not defined as a type.', warning_line: 'def obtain_price_list(self, price_list: dict[str, float]):', source_code: 'No function definition found'}

You should reply as the following format only. Don't engage in conversation and add any extra explanation.
1. Cause
2. Solution in Code
3. Explain solution in bullet point
'''

In [24]:
result = qa(question)
print(result['answer'])

  result = qa(question)


1. **Cause:** The `dict` type annotation needs to be imported from the `typing` module when used in this way.  Before Python 3.9, using `dict[str, float]` directly was not valid syntax.

2. **Solution in Code:**

```python
from typing import dict

def obtain_price_list(self, price_list: dict[str, float]):
    # ... function body ...
```

3. **Explanation:**

*   The `from typing import dict` line imports the `dict` type hint. This allows you to use `dict[str, float]` to specify a dictionary where keys are strings and values are floats.
*   This import is required for type hinting to work correctly, especially in earlier Python versions (before 3.9).  In Python 3.9 and later, the built-in `dict` can be used for type hints without importing from `typing`.



In [25]:
question = "As newbie from other programming background don't have a clear understing of python types. So explain the previous types with example from different coding language so that they can be familiar with python type hints"
result = qa(question)
print(result['answer'])

Let's break down Python's type hinting and specifically `dict[str, float]` in function signatures.

**Python Type Hinting**

Type hinting in Python is a way to add static type information to your code.  It's important to understand that Python remains *dynamically typed*.  Type hints are primarily used for:

1. **Readability:** They make code easier to understand by explicitly stating the expected types of variables, function arguments, and return values.

2. **Static Analysis:** Tools like `mypy` can use type hints to catch type errors *before* runtime, improving code reliability.  Your IDE can also leverage type hints for better code completion and error detection.

3. **Documentation:** Type hints serve as documentation, clarifying the intended use of functions and variables.

**`dict[str, float]` in Function Signatures**

The annotation `dict[str, float]` specifies that a function argument or return value should be a dictionary where:

* Keys are strings (`str`).
* Values are float

In [None]:
question = '''
This is an object containing pyre error message and a warning line
{rule_id: 'Undefined or invalid type [11]', message: 'Annotation `dict` is not defined as a type.', warning_line: 'def obtain_price_list(self, price_list: dict[str, float]):', source_code: Null}
To get the full solution we need the source code as well.

Extract the complete source code and print it
'''
result = qa(question)
print(result['answer'])