In [None]:
from typing import Any
from uuid import UUID

from langchain_core.callbacks import BaseCallbackHandler
from langchain_core.documents import Document
from langchain_core.output_parsers import StrOutputParser
from langchain.document_loaders import DirectoryLoader
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.llms import HuggingFacePipeline
from langchain_text_splitters import (
  Language, RecursiveCharacterTextSplitter
)
from langchain.vectorstores import FAISS

# Demonstration of retrieval-augmented C++ code completion with pretrained Granite-3B-Code-Base-2K model

## Introduction
In this notebook is demonstrated how a [Granite-3B-Code-Base-2K](#Granite-3B-Code-Base-2K) model can be used for C++ code completion with [retrieval-augmented generation (RAG)](https://en.wikipedia.org/wiki/Retrieval-augmented_generation), based on *.cc* and *.h* files in a folder. The demonstrated method utilizes:
- [LangChain](#LangChain) framework for development of applications powered by large language models;
- [Facebook AI Similarity Search (FAISS)](#Facebook-AI-Similarity-Search) vector-based text similarity search;
- [Hugging Face Transformers](#Hugging-Face-Transformers) deep learning library.

## Configuration

In [None]:
# Data
DATA_DIR = "data"
CHUNK_SIZE = 240  # 3 * 80-char lines

# Text Generation
MAX_GEN_TOKENS = 100
MODEL_NAME = "ibm-granite/granite-3b-code-base-2k"

## Dataset retrieval

The dataset constitutes a `FAISS` vector database created from C++ code chunks extracted from C++ source and header files found under the `DATA_DIR` folder. The vectors are based on the default `HuggingFaceEmbeddings` embedding model.

Only the best vector similarity match is retrieved from the database.

In [None]:
doc_loader = DirectoryLoader(DATA_DIR, glob=["**/*.cc", "**/*.h"])
docs = doc_loader.load()
cpp_splitter = RecursiveCharacterTextSplitter.from_language(
  language=Language.CPP, chunk_size=CHUNK_SIZE, chunk_overlap=0
)
cpp_chunks = cpp_splitter.split_documents(docs)
vectorstore = FAISS.from_documents(
  cpp_chunks, HuggingFaceEmbeddings()
)
retriever = vectorstore.as_retriever(
  search_type="similarity", search_kwargs={"k": 1}
)

## Model

The [Granite-3B-Code-Base-2K](#Granite-3B-Code-Base-2K) model is used via a [Hugging Face Transformers](#Hugging-Face-Transformers) text generation pipeline.

In [None]:
llm = HuggingFacePipeline.from_model_id(
  model_id=MODEL_NAME,
  task="text-generation",
  pipeline_kwargs={"max_new_tokens": MAX_GEN_TOKENS,
                   "return_full_text": False},
  device_map="auto",
)

## Testing

For a comparison test, given a custom prompt, `RAG`-based code completion is compared with a plaintext-based code completion.

In [None]:
class RagChainHandler(BaseCallbackHandler):
  def on_llm_start(
    self,
    serialized: dict[str, Any],
    prompts: list[str],
    *,
    run_id: UUID,
    parent_run_id: UUID | None = None,
    tags: list[str] | None = None,
    metadata: dict[str, Any] | None = None,
    **kwargs: Any,
  ) -> Any:
    print(f"[ RAG PROMPT ]\n\n{prompts[0]}")
    print("-" * 80)


def format_docs(docs: list[Document]) -> str:
  return docs[0].page_content


prompt = "engine.StartGame"
rag_chain = (
  retriever | format_docs | llm | StrOutputParser()
)
rag_chain = rag_chain.with_config(callbacks=[RagChainHandler()])
print(f"[ PROMPT ]\n\n{prompt}")
print("-" * 80)
print(f"[ GENERATED ]\n\n{llm.invoke(prompt)}")
print("-" * 80)
print(f"[ RAG GENERATED ]\n\n{rag_chain.invoke(prompt)}")
print("-" * 80)

## References

<br><br>

### APA style for references
American Psychological Association. (2022). Creating an APA Style reference list guide. https://apastyle.apa.org/instructional-aids/creating-reference-list.pdf

American Psychological Association. (2024). APA Style common reference examples guide. https://apastyle.apa.org/instructional-aids/reference-examples.pdf

<br><br>

### Vector databases
#### Facebook AI Similarity Search
Douze, M., Guzhva, A., Deng, C., Johnson, J., Szilvasy, G., Mazaré, P. E., Lomeli, M., Hosseini, L., & Jégou, H. (2024). The Faiss library. ArXiv, abs/2401.08281. https://arxiv.org/abs/2401.08281

Johnson, J., Douze, M., & Jégou, H. (2019). Billion-scale similarity search with GPUs. IEEE Transactions on Big Data, 7(3), 535-547. https://arxiv.org/abs/1702.08734
- [FAISS - Wikipedia](https://en.wikipedia.org/wiki/FAISS)

<br><br>

### Machine learning models
#### Granite-3B-Code-Base-2K
Mishra, M., Stallone, M., Zhang, G., Shen, Y., Prasad, A., Soria, A.M., Merler, M., Selvam, P., Surendran, S., Singh, S., Sethi, M., Dang, X., Li, P., Wu, K., Zawad, S., Coleman, A., White, M., Lewis, M., Pavuluri, R., Koyfman, Y., Lublinsky, B., Bayser, M.D., Abdelaziz, I., Basu, K., Agarwal, M., Zhou, Y., Johnson, C., Goyal, A., Patel, H., Shah, Y., Zerfos, P., Ludwig, H., Munawar, A., Crouse, M., Kapanipathi, P., Salaria, S., Calio, B., Wen, S., Seelam, S.R., Belgodere, B.M., Fonseca, C., Singhee, A., Desai, N., Cox, D.D., Puri, R., & Panda, R. (2024). Granite Code Models: A Family of Open Foundation Models for Code Intelligence. ArXiv, abs/2405.04324. https://arxiv.org/abs/2405.04324

<br><br>

### Guides and tutorials
- [Introduction | 🦜️🔗 LangChain](https://python.langchain.com/docs/introduction/)
- [ibm-granite/granite-3b-code-base-2k · Hugging Face](https://huggingface.co/ibm-granite/granite-3b-code-base-2k)
- [Hugging Face - Documentation](https://huggingface.co/docs)

<br><br>

### Libraries and frameworks
#### Hugging Face Transformers
Wolf, T., Debut, L., Sanh, V., Chaumond, J., Delangue, C., Moi, A., Cistac, P., Ma, C., Jernite, Y., Plu, J., Xu, C., Le Scao, T., Gugger, S., Drame, M., Lhoest, Q., & Rush, A. M. (2020). Transformers: State-of-the-Art Natural Language Processing [Conference paper]. 38–45. https://www.aclweb.org/anthology/2020.emnlp-demos.6
- [Transformers](https://huggingface.co/docs/transformers/index)

#### LangChain
Chase, H. (2022). LangChain [Computer software]. https://github.com/langchain-ai/langchain
- [Introduction | 🦜️🔗 LangChain](https://python.langchain.com/docs/introduction/)
