# trying out "Talking to your code base"

See https://www.youtube.com/watch?v=AAMJZTEH_h4


In [None]:
%pip install openai tiktoken chromadb langchain langchain-community

In [26]:
import dotenv
from langchain.text_splitter import Language
from langchain.document_loaders.generic import GenericLoader
from langchain.document_loaders.parsers import LanguageParser


In [31]:
dotenv.load_dotenv()

True

In [7]:
repo_path = "/Users/kup/git/corellia"

In [10]:
loader = GenericLoader.from_filesystem(
    repo_path,
    glob="**/*",
    suffixes=[".java"],
    parser=LanguageParser(language=Language.JAVA, parser_threshold=500)
)
documents = loader.load()
len(documents)

31

In [12]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
java_splitter = RecursiveCharacterTextSplitter.from_language(
    language=Language.JAVA,
    chunk_size=2000,
    chunk_overlap=200
)
texts = java_splitter.split_documents(documents)
len(texts)

58

## Embed into a vector store

In [25]:
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings

In [32]:
db = Chroma.from_documents(texts, OpenAIEmbeddings(disallowed_special=()))

In [35]:
retriever = db.as_retriever(
    search_type="mmr",
    search_kwargs={"k": 8}
)

In [36]:
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationSummaryMemory
from langchain.chains import ConversationalRetrievalChain

llm = ChatOpenAI(model_name="gpt-4")
memory = ConversationSummaryMemory(llm=llm,memory_key="chat_history",return_messages=True)
qa = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory)

In [38]:
question = "What is the class hierarchy?"
result = qa(question)
result['answer']


"Based on the provided context, the specific class hierarchy isn't clearly defined. However, we can see that there are several classes defined under the package `ch.baloise.corellia.api.entities` and `ch.baloise.corellia.api.doc`. \n\nHere are the classes mentioned:\n\n1. **MyTest**: This class has a method to print items of a list.\n\n2. **Role**: This class implements Serializable and seems to be an entity representing a role, with attributes such as `RoleType`, `Person`, and `Company`.\n\n3. **Coverage**: This class also implements Serializable and appears to represent an insurance coverage with various attributes related to insurance.\n\n4. **DocumentTypeTest**: This is a test class, likely used to test methods in some `Document` class. It's not entirely clear from the context provided, but this class appears to test the conversion of string values to a `DocumentType` enumeration.\n\n5. **OpenApi3DocGenerator**: This class generates an OpenAPI specification based on JAX-RS annotate