In [1]:
import os

In [2]:
from git import Repo
from langchain.text_splitter import Language
from langchain.document_loaders.generic import GenericLoader
from langchain.document_loaders.parsers import LanguageParser
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationSummaryMemory
from langchain.chains import ConversationalRetrievalChain

In [3]:
%pwd

'd:\\project-ruang-guru\\Debugging\\Code\\Custom-Debugging-Code\\research'

In [4]:
!mkdir test_repo

A subdirectory or file test_repo already exists.


In [5]:
repo_path = "D:\\project-ruang-guru\\Project_DS_Intern\\source\\rg-ds-chatbot-api\\ds_chatbot\\api"

# Repo.clone_from("https://github.com/entbappy/End-to-end-ML-Project-Implementation", to_path=repo_path)

In [6]:
loader = GenericLoader.from_filesystem(repo_path,
                                        glob = "**/*",
                                       suffixes=[".py"],
                                       parser = LanguageParser(language=Language.PYTHON, parser_threshold=500)
)

In [7]:
documents = loader.load()

In [8]:
documents

 Document(page_content='from ds_chatbot.client.openai.gpt_client import GPTClient\nfrom ds_chatbot.client.event_tracking.api import EventTrackingApi\nfrom ds_chatbot.client.timescale_tracking.api import TimescaleTrackingApi\nfrom ds_chatbot.client.rg_llm.client import RgLLM\nfrom ds_chatbot.api.routers.persona import db_conn\n\nfrom ds_chatbot.client.openai.prompt_gen import PromptChatGPT\nfrom ds_chatbot.core.llm.llm_connector import LLMConnector\nfrom ds_chatbot.core.llm.rg_llm_connector import RgLLMConnector\n\nfrom ds_chatbot.core.service.dto import LearningBotRequest\nfrom ds_chatbot.core.service.chatbot_agent import ChatbotAgent\nfrom ds_chatbot.core.service.chatgpt_discovery import ChatGPTDiscovery\nfrom ds_chatbot.core.service.doraemon import DoraemonBot\nfrom ds_chatbot.core.service.salesbot import SalesBotService\nfrom ds_chatbot.core.service.learning_bot import LearningBotService\nfrom ds_chatbot.core.service.onboarding_doraemon import OnboardingDoraemonBot\nfrom ds_chatbot.

### Chunkings

In [9]:
documents_splitter = RecursiveCharacterTextSplitter.from_language(language = Language.PYTHON,
                                                             chunk_size = 2000,
                                                             chunk_overlap = 200)

In [10]:
texts = documents_splitter.split_documents(documents)

In [11]:
len(texts)

36

In [12]:
import os

### Embedding model

In [13]:
os.environ["OPENAI_API_KEY"] = "********"

In [14]:
embeddings=OpenAIEmbeddings(disallowed_special=())

### Knowledge base (vector DB)

In [15]:
# vectordb = Chroma.from_documents(texts, embedding=embeddings, persist_directory='./data-gpt-3.5-1')
# vectordb.persist()

In [None]:
vectordb = Chroma.from_documents(
    documents=texts,
    collection_name="rag-chroma",
    embedding=embeddings,
)

### LLM Wrapper

In [16]:
# llm = ChatOpenAI(model_name="gpt-4")
llm = ChatOpenAI(model_name="gpt-3.5-turbo")

In [17]:
memory = ConversationSummaryMemory(llm=llm, memory_key = "chat_history", return_messages=True)

In [18]:
qa = ConversationalRetrievalChain.from_llm(llm, retriever=vectordb.as_retriever(search_type="mmr", search_kwargs={"k":5}), memory=memory)

### Q&A

In [19]:
question = "Apakah ada bug pada kode yang diberikan"

In [20]:
result = qa(question)
print(result['answer'])

Tidak, berdasarkan potongan kode yang diberikan, tidak terlihat adanya bug yang jelas.


In [21]:
question = "Apakah ada kode yang perlu di optimisasi"

In [22]:
result = qa(question)
print(result['answer'])

Kode yang diberikan terlihat sudah cukup baik dan tidak memerlukan optimisasi yang signifikan. Kode tersebut sudah terstruktur dengan baik dan mudah dibaca. Jika ada bagian tertentu yang ingin dioptimalkan, itu mungkin lebih pada preferensi atau kebutuhan spesifik proyek yang bersangkutan.


In [23]:
from pprint import pprint
pprint(result['answer'])

('Kode yang diberikan terlihat sudah cukup baik dan tidak memerlukan '
 'optimisasi yang signifikan. Kode tersebut sudah terstruktur dengan baik dan '
 'mudah dibaca. Jika ada bagian tertentu yang ingin dioptimalkan, itu mungkin '
 'lebih pada preferensi atau kebutuhan spesifik proyek yang bersangkutan.')
