Update doc chain documentation

arjbingly · Apr 23, 2024 · 99ca1f5 · 99ca1f5
1 parent 837e179
commit 99ca1f5
Show file tree

Hide file tree

Showing 60 changed files with 3,911 additions and 12 deletions.
diff --git a/ci/branch_Jenkinsfile b/ci/branch_Jenkinsfile
@@ -0,0 +1,120 @@
+pipeline {
+    agent any
+
+    options{
+        skipDefaultCheckout(true)
+    }
+    environment {
+        PYTHONPATH = "${env.WORKSPACE}/.venv/bin"
+        CUDACXX = '/usr/local/cuda-12/bin/nvcc'
+        CMAKE_ARGS = "-DLLAMA_CUBLAS=on"
+        PATH="/usr/local/cuda-12.3/bin:$PATH"
+        LD_LIBRARY_PATH="/usr/local/cuda-12.3/lib64:$LD_LIBRARY_PATH"
+        GIT_SSH_COMMAND="ssh -o StrictHostKeyChecking=accept-new"
+        }
+
+
+    stages {
+        stage('Checkout') {
+            steps {
+                    cleanWs()
+                    checkout scm
+            }
+        }
+
+        stage('Create venv'){
+            steps {
+                sh 'python3 -m venv .venv'
+            }
+        }
+
+        stage('Install dependencies'){
+            steps {
+                withPythonEnv(PYTHONPATH){
+                    sh 'pip install -e .'
+                }
+            }
+
+        }
+
+        stage('Config'){
+            steps{
+                withPythonEnv(PYTHONPATH){
+                    sh 'python3 ci/modify_config.py'
+                    sh 'rm -rf $JENKINS_HOME/ci_test_data/data/vectordb/ci_test'
+                    sh 'cp -r $JENKINS_HOME/ci_test_data/data/backup_vectordb/ci_test $JENKINS_HOME/ci_test_data/data/vectordb'
+                }
+            }
+        }
+
+        stage('Linting'){
+            steps {
+                withPythonEnv(PYTHONPATH){
+                    sh 'pip install ruff'
+                    catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE'){
+                        sh 'ruff check . --exclude *venv* --output-format junit -o ruff-report.xml'
+                        sh 'ruff format .'
+                    }
+                }
+            }
+            post {
+                always{
+                    withChecks('Lint Checks'){
+                         junit 'ruff-report.xml'
+                     }
+                }
+            }
+        }
+
+        stage('Static type check'){
+            steps {
+                withPythonEnv(PYTHONPATH){
+                    sh 'pip install mypy'
+                    catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE'){
+                        sh 'python3 -m mypy -p src.grag --junit-xml mypy-report.xml'
+                    }
+                }
+            }
+            post {
+                always{
+                     withChecks('Static Type Checks'){
+                         junit 'mypy-report.xml'
+                     }
+                }
+            }
+        }
+
+        stage('Tests'){
+            steps{
+                sh 'docker pull chromadb/chroma'
+                sh 'docker run -d --name jenkins-chroma -p 8000:8000 chromadb/chroma'
+                withPythonEnv(PYTHONPATH){
+                    sh 'pip install pytest'
+                    sh 'python3 ci/unlock_deeplake.py'
+                    sh 'pytest src -vvv --junitxml=pytest-report.xml'
+                }
+            }
+            post {
+                always{
+                    sh 'docker stop jenkins-chroma'
+                    sh 'docker rm jenkins-chroma'
+                     withChecks('Integration Tests'){
+                        junit 'pytest-report.xml'
+                     }
+                }
+            }
+        }
+    }
+    post {
+        cleanup{
+            cleanWs(
+                cleanWhenNotBuilt: false,
+                deleteDirs: true,
+                disableDeferredWipeout: true,
+                notFailBuild: true,
+                patterns: [[pattern: '.gitignore', type: 'INCLUDE'],
+                           [pattern: '.propsfile', type: 'EXCLUDE']]
+                    )
+        }
+    }
+}
diff --git a/cookbook/Basic-RAG/BasicRAG_refine.py b/cookbook/Basic-RAG/BasicRAG_refine.py
@@ -1,6 +1,9 @@
 """Refine Chain
 =======================
 This cookbook demonstrates how to use the refine chain for BasicRAG.
+.. image:: src/docs/_static/refine_chain_langchain_illustration.jpg
+  :width: 400
+  :alt: Refine Documents Chain Process
 """
 
 from grag.components.multivec_retriever import Retriever

diff --git a/cookbook/Basic-RAG/BasicRAG_stuff.py b/cookbook/Basic-RAG/BasicRAG_stuff.py
@@ -1,6 +1,9 @@
 """Stuff Chain
 =======================
 This cookbook demonstrates how to use the stuff chain for BasicRAG.
+.. image:: src/docs/_static/stuff_chain_langchain_illustration.jpg
+  :width: 400
+  :alt: Stuff Documents Chain Process
 """
 
 from grag.components.multivec_retriever import Retriever

diff --git a/cookbook/RAG-GUI/app.py b/cookbook/RAG-GUI/app.py
@@ -0,0 +1,178 @@
+"""A cookbook demonstrating how to run RAG app on streamlit."""
+
+import os
+import sys
+from pathlib import Path
+
+import streamlit as st
+from grag.components.multivec_retriever import Retriever
+from grag.components.utils import get_config
+from grag.components.vectordb.deeplake_client import DeepLakeClient
+from grag.rag.basic_rag import BasicRAG
+
+sys.path.insert(1, str(Path(os.getcwd()).parents[1]))
+
+st.set_page_config(page_title="GRAG",
+                   menu_items={
+                       "Get Help": "https://github.com/arjbingly/Capstone_5",
+                       "About": "This is a simple GUI for GRAG"
+                   })
+
+
+def spinner(text):
+    """Decorator that displays a loading spinner with a custom text message during the execution of a function.
+
+    This decorator wraps any function to show a spinner using Streamlit's st.spinner during the function call,
+    indicating that an operation is in progress. The spinner is displayed with a user-defined text message.
+
+    Args:
+        text (str): The message to display next to the spinner.
+
+    Returns:
+        function: A decorator that takes a function and wraps it in a spinner context.
+    """
+
+    def _spinner(func):
+        """A decorator function that takes another function and wraps it to show a spinner during its execution.
+
+        Args:
+            func (function): The function to wrap.
+
+        Returns:
+            function: The wrapped function with a spinner displayed during its execution.
+        """
+
+        def wrapper_func(*args, **kwargs):
+            """The wrapper function that actually executes the wrapped function within the spinner context.
+
+            Args:
+                *args: Positional arguments passed to the wrapped function.
+                **kwargs: Keyword arguments passed to the wrapped function.
+            """
+            with st.spinner(text=text):
+                func(*args, **kwargs)
+
+        return wrapper_func
+
+    return _spinner
+
+
+@st.cache_data
+def load_config():
+    """Loads config."""
+    return get_config()
+
+
+conf = load_config()
+
+
+class RAGApp:
+    """Application class to manage a Retrieval-Augmented Generation (RAG) model interface.
+
+    Attributes:
+        app: The main application or server instance hosting the RAG model.
+        conf: Configuration settings or parameters for the application.
+    """
+
+    def __init__(self, app, conf):
+        """Initializes the RAGApp with a given application and configuration.
+
+        Args:
+            app: The main application or framework instance that this class will interact with.
+            conf: A configuration object or dictionary containing settings for the application.
+        """
+        self.app = app
+        self.conf = conf
+
+    def render_sidebar(self):
+        """Renders the sidebar in the application interface with model selection and parameters."""
+        with st.sidebar:
+            st.title('GRAG')
+            st.subheader('Models and parameters')
+            st.sidebar.selectbox('Choose a model',
+                                 ['Llama-2-13b-chat', 'Llama-2-7b-chat',
+                                  'Mixtral-8x7B-Instruct-v0.1', 'gemma-7b-it'],
+                                 key='selected_model')
+            st.sidebar.slider('Temperature',
+                              min_value=0.1,
+                              max_value=1.0,
+                              value=0.1,
+                              step=0.1,
+                              key='temperature')
+            st.sidebar.slider('Top-k',
+                              min_value=1,
+                              max_value=5,
+                              value=3,
+                              step=1,
+                              key='top_k')
+            st.button('Load Model', on_click=self.load_rag)
+            st.checkbox('Show sources', key='show_sources')
+
+    @spinner(text='Loading model...')
+    def load_rag(self):
+        """Loads the specified RAG model based on the user's selection and settings in the sidebar."""
+        if 'rag' in st.session_state:
+            del st.session_state['rag']
+
+        llm_kwargs = {"temperature": st.session_state['temperature'], }
+        if st.session_state['selected_model'] == "Mixtral-8x7B-Instruct-v0.1":
+            llm_kwargs['n_gpu_layers'] = 16
+            llm_kwargs['quantization'] = 'Q4_K_M'
+        elif st.session_state['selected_model'] == "gemma-7b-it":
+            llm_kwargs['n_gpu_layers'] = 18
+            llm_kwargs['quantization'] = 'f16'
+
+        retriever_kwargs = {
+            "client_kwargs": {"read_only": True, },
+            "top_k": st.session_state['top_k']
+        }
+        client = DeepLakeClient(collection_name="usc", read_only=True)
+        retriever = Retriever(vectordb=client)
+
+        st.session_state['rag'] = BasicRAG(model_name=st.session_state['selected_model'], stream=True,
+                                           llm_kwargs=llm_kwargs, retriever=retriever,
+                                           retriever_kwargs=retriever_kwargs)
+        st.success(
+            f"""Model Loaded !!!
+    
+    Model Name: {st.session_state['selected_model']}
+    Temperature: {st.session_state['temperature']}
+    Top-k     : {st.session_state['top_k']}"""
+        )
+
+    def clear_cache(self):
+        """Clears the cached data within the application."""
+        st.cache_data.clear()
+
+    def render_main(self):
+        """Renders the main chat interface for user interaction with the loaded RAG model."""
+        st.title(":us: US Constitution Expert! :mortar_board:")
+        if 'rag' not in st.session_state:
+            st.warning("You have not loaded any model")
+        else:
+            user_input = st.chat_input("Ask me anything about the US Constitution.")
+
+            if user_input:
+                with st.chat_message("user"):
+                    st.write(user_input)
+                with st.chat_message("assistant"):
+                    _ = st.write_stream(
+                        st.session_state['rag'](user_input)[0]
+                    )
+                    if st.session_state['show_sources']:
+                        retrieved_docs = st.session_state['rag'].retriever.get_chunk(user_input)
+                        for index, doc in enumerate(retrieved_docs):
+                            with st.expander(f"Source {index + 1}"):
+                                st.markdown(f"**{index + 1}. {doc.metadata['source']}**")
+                                # if st.session_state['show_content']:
+                                st.text(f"**{doc.page_content}**")
+
+    def render(self):
+        """Orchestrates the rendering of both main and sidebar components of the application."""
+        self.render_main()
+        self.render_sidebar()
+
+
+if __name__ == "__main__":
+    app = RAGApp(st, conf)
+    app.render()
diff --git a/full_report/Capstone5 Presentation v1.pptx b/full_report/Capstone5 Presentation v1.pptx
diff --git a/full_report/~$Capstone5 Presentation v1.pptx b/full_report/~$Capstone5 Presentation v1.pptx
diff --git a/llm_quantize/README.md b/llm_quantize/README.md
@@ -0,0 +1,14 @@
+## Model Quantization
+
+This module provides an interactive way to quantize your model.
+To quantize model, run:  
+`python -m grag.quantize.quantize`
+
+After running the above command, user will be prompted with the following:
+
+- Path where user wants to clone [llama.cpp](!https://github.com/ggerganov/llama.cpp) repo
+- If user wants us to download model from [HuggingFace](!https://huggingface.co/models) or user has model downloaded
+  locally
+  - For the former, user will be prompted to provide repo path from HuggingFace
+  - For the latter, user will be instructed to copy the model and input the name of model directory
+- Finally, user will be prompted to enter quantization (recommended Q5_K_M or Q4_K_M, etc.). For more details, check [here](!https://github.com/ggerganov/llama.cpp/blob/master/examples/quantize/quantize.cpp#L19).
diff --git a/projects/Basic-RAG/BasicRAG_ingest.py b/projects/Basic-RAG/BasicRAG_ingest.py
@@ -0,0 +1,16 @@
+"""A cookbook demonstrating how to ingest pdf files for use with BasicRAG."""
+
+from pathlib import Path
+
+from grag.components.multivec_retriever import Retriever
+from grag.components.vectordb.deeplake_client import DeepLakeClient
+
+# from grag.rag.basic_rag import BasicRAG
+
+client = DeepLakeClient(collection_name="test")
+retriever = Retriever(vectordb=client)
+
+dir_path = Path(__file__).parent / "some_dir"
+
+retriever.ingest(dir_path)
+# rag = BasicRAG(doc_chain="refine")
diff --git a/src/docs/_build/doctrees/auto_examples/Basic-RAG/BasicRAG_refine.doctree b/src/docs/_build/doctrees/auto_examples/Basic-RAG/BasicRAG_refine.doctree
diff --git a/src/docs/_build/doctrees/auto_examples/Basic-RAG/BasicRAG_stuff.doctree b/src/docs/_build/doctrees/auto_examples/Basic-RAG/BasicRAG_stuff.doctree
diff --git a/src/docs/_build/doctrees/environment.pickle b/src/docs/_build/doctrees/environment.pickle
diff --git a/src/docs/_build/html/.buildinfo b/src/docs/_build/html/.buildinfo
@@ -1,4 +1,4 @@
 # Sphinx build info version 1
 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
-config: 6b07e347145cdddb9512d0a4e5895210
+config: d4c21b77045124631c4c68a22a4bc3b5
 tags: 645f666f9bcd5a90fca523b33c5a78b7
diff --git a/src/docs/_build/html/_downloads/08281b5b1106412fcca1eee951af6faa/BasicRAG_stuff.py b/src/docs/_build/html/_downloads/08281b5b1106412fcca1eee951af6faa/BasicRAG_stuff.py
@@ -1,6 +1,9 @@
 """Stuff Chain
 =======================
 This cookbook demonstrates how to use the stuff chain for BasicRAG.
+.. image:: src/docs/_static/stuff_chain_langchain_illustration.jpg
+  :width: 400
+  :alt: Stuff Documents Chain Process
 """
 
 from grag.components.multivec_retriever import Retriever

diff --git a/src/docs/_build/html/_downloads/2ceb93416492b18edb9a519fe43e25d4/BasicRAG_stuff.ipynb b/src/docs/_build/html/_downloads/2ceb93416492b18edb9a519fe43e25d4/BasicRAG_stuff.ipynb
@@ -4,7 +4,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "# Stuff Chain\nThis cookbook demonstrates how to use the stuff chain for BasicRAG.\n"
+        "# Stuff Chain\nThis cookbook demonstrates how to use the stuff chain for BasicRAG.\n<img src=\"file://src/docs/_static/stuff_chain_langchain_illustration.jpg\" width=\"400\" alt=\"Stuff Documents Chain Process\">\n"
       ]
     },
     {

diff --git a/src/docs/_build/html/_downloads/40ffe2716096f331549183db9c0ece72/Retriver-GUI_jupyter.zip b/src/docs/_build/html/_downloads/40ffe2716096f331549183db9c0ece72/Retriver-GUI_jupyter.zip
diff --git a/src/docs/_build/html/_downloads/7c6daaeaa6e5520da795fa975d498452/Retriver-GUI_python.zip b/src/docs/_build/html/_downloads/7c6daaeaa6e5520da795fa975d498452/Retriver-GUI_python.zip
diff --git a/src/docs/_build/html/_downloads/bf6c4ba115907c5ef1c944698d335d59/BasicRAG_refine.ipynb b/src/docs/_build/html/_downloads/bf6c4ba115907c5ef1c944698d335d59/BasicRAG_refine.ipynb
@@ -4,7 +4,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "# Refine Chain\nThis cookbook demonstrates how to use the refine chain for BasicRAG.\n"
+        "# Refine Chain\nThis cookbook demonstrates how to use the refine chain for BasicRAG.\n<img src=\"file://src/docs/_static/refine_chain_langchain_illustration.jpg\" width=\"400\" alt=\"Refine Documents Chain Process\">\n"
       ]
     },
     {

diff --git a/src/docs/_build/html/_downloads/d30c8b1c6e4654b2ad3d2a98fac0be74/Basic-RAG_python.zip b/src/docs/_build/html/_downloads/d30c8b1c6e4654b2ad3d2a98fac0be74/Basic-RAG_python.zip
diff --git a/src/docs/_build/html/_downloads/f9939c7be8f2cbb228881fcceb9ea19d/Basic-RAG_jupyter.zip b/src/docs/_build/html/_downloads/f9939c7be8f2cbb228881fcceb9ea19d/Basic-RAG_jupyter.zip
diff --git a/src/docs/_build/html/_downloads/ff38cea876c84a5f22af2d8c8b56bc59/BasicRAG_refine.py b/src/docs/_build/html/_downloads/ff38cea876c84a5f22af2d8c8b56bc59/BasicRAG_refine.py
@@ -1,6 +1,9 @@
 """Refine Chain
 =======================
 This cookbook demonstrates how to use the refine chain for BasicRAG.
+.. image:: src/docs/_static/refine_chain_langchain_illustration.jpg
+  :width: 400
+  :alt: Refine Documents Chain Process
 """
 
 from grag.components.multivec_retriever import Retriever

diff --git a/src/docs/_build/html/_images/basic_RAG_pipeline.png b/src/docs/_build/html/_images/basic_RAG_pipeline.png