# Context Aware Source Code Analysis

In [1]:
from git import Repo
from langchain.text_splitter import Language
from langchain.document_loaders.generic import GenericLoader
from langchain.document_loaders.parsers import LanguageParser
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain_ollama import ChatOllama
from langchain.memory import ConversationSummaryMemory
from langchain.chains import ConversationalRetrievalChain

import os

In [None]:
!mkdir test_repo

In [3]:
repo_path = "test_repo/"

In [None]:
from git import Repo

repo = Repo.clone_from("https://github.com/ZikGitHub/Medical-Chatbot-GenerativeAI.git", "test_repo" )

In [4]:
loader = GenericLoader.from_filesystem(repo_path,
                                       glob="**/*",
                                       suffixes=[".py"],
                                       parser=LanguageParser(language="python", parser_threshold=500))

In [5]:
documents = loader.load()
documents

[Document(metadata={'source': 'test_repo\\app.py', 'language': 'python'}, page_content='from flask import Flask, render_template, request, jsonify\nfrom src.helper import download_embedding\nfrom langchain_pinecone import PineconeVectorStore\nfrom langchain.llms import OpenAI'),
 Document(metadata={'source': 'test_repo\\setup.py', 'language': 'python'}, page_content='from setuptools import find_packages, setup\n\nsetup(\n    name = "Generative AI Project",\n    version = "0.0.0",\n    author = "Zishan Khan",\n    author_email = "zishankhan@pm.me",\n    packages = find_packages(),\n    install_requires = []\n)'),
 Document(metadata={'source': 'test_repo\\template.py', 'language': 'python'}, page_content='import os\nfrom pathlib import Path\nimport logging\n\nlogging.basicConfig(\n    level=logging.INFO,\n    format="%(asctime)s [%(levelname)s] %(message)s"\n)\n\nlist_of_files = [\n    "src/__init__.py",\n    "src/helper.py",\n    ".env",\n    "requirements.txt",\n    "setup.py",\n    "a

In [6]:
len(documents)

7

In [7]:
documents[0]

Document(metadata={'source': 'test_repo\\app.py', 'language': 'python'}, page_content='from flask import Flask, render_template, request, jsonify\nfrom src.helper import download_embedding\nfrom langchain_pinecone import PineconeVectorStore\nfrom langchain.llms import OpenAI')

In [8]:
documents_splitter =  RecursiveCharacterTextSplitter.from_language(language="python", chunk_size=500, chunk_overlap=20)

In [9]:
texts = documents_splitter.split_documents(documents)
texts

[Document(metadata={'source': 'test_repo\\app.py', 'language': 'python'}, page_content='from flask import Flask, render_template, request, jsonify\nfrom src.helper import download_embedding\nfrom langchain_pinecone import PineconeVectorStore\nfrom langchain.llms import OpenAI'),
 Document(metadata={'source': 'test_repo\\setup.py', 'language': 'python'}, page_content='from setuptools import find_packages, setup\n\nsetup(\n    name = "Generative AI Project",\n    version = "0.0.0",\n    author = "Zishan Khan",\n    author_email = "zishankhan@pm.me",\n    packages = find_packages(),\n    install_requires = []\n)'),
 Document(metadata={'source': 'test_repo\\template.py', 'language': 'python'}, page_content='import os\nfrom pathlib import Path\nimport logging\n\nlogging.basicConfig(\n    level=logging.INFO,\n    format="%(asctime)s [%(levelname)s] %(message)s"\n)\n\nlist_of_files = [\n    "src/__init__.py",\n    "src/helper.py",\n    ".env",\n    "requirements.txt",\n    "setup.py",\n    "a

In [10]:
len(texts)

9

In [11]:
from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

  from .autonotebook import tqdm as notebook_tqdm


In [13]:
query_result = embeddings.embed_query("What is a heart disease?")
len(query_result)

384

In [None]:
!pip install chromadb 



In [14]:
from langchain.vectorstores import Chroma
vectordb = Chroma.from_documents(texts, embedding = embeddings, persist_directory="db")

In [15]:
vectordb.persist()

  vectordb.persist()


In [16]:
llm = ChatOllama(temperature=0.4, max_tokens=600, model="deepseek-r1:1.5b")

In [17]:
memory = ConversationSummaryMemory(llm=llm, memory_key = "chat_history", return_messages=True)

  memory = ConversationSummaryMemory(llm=llm, memory_key = "chat_history", return_messages=True)


In [18]:
qa = ConversationalRetrievalChain.from_llm(llm=llm, retriever=vectordb.as_retriever(search_type='mmr', search_kwargs={'k': 8}), memory=memory)

In [21]:
question = "What is load_pdf_file function?"

In [22]:
result = qa(question)
print(result["answer"])

Number of requested results 20 is greater than number of elements in index 9, updating n_results = 9


<think>
