![Img](https://app.theheadstarter.com/static/hs-logo-opengraph.png)

# Headstarter Codebase RAG Project

![Screenshot 2024-11-25 at 7 12 58 PM](https://github.com/user-attachments/assets/0bd67cf0-43d5-46d2-879c-a752cae4c8e3)

# Install Necessary Libraries

In [1]:
! pip install pygithub langchain langchain-community openai tiktoken pinecone-client langchain_pinecone sentence-transformers

Collecting pygithub
  Downloading PyGithub-2.5.0-py3-none-any.whl.metadata (3.9 kB)
Collecting langchain-community
  Downloading langchain_community-0.3.9-py3-none-any.whl.metadata (2.9 kB)
Collecting tiktoken
  Downloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Collecting pinecone-client
  Downloading pinecone_client-5.0.1-py3-none-any.whl.metadata (19 kB)
Collecting langchain_pinecone
  Downloading langchain_pinecone-0.2.0-py3-none-any.whl.metadata (1.7 kB)
Collecting pynacl>=1.4.0 (from pygithub)
  Downloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl.metadata (8.6 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting httpx-sse<0.5.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting langchain
  Downloading langchain-0.3.9-py

In [2]:
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from langchain_pinecone import PineconeVectorStore
from langchain.embeddings import OpenAIEmbeddings
from langchain_community.embeddings import HuggingFaceEmbeddings
from google.colab import userdata
from pinecone import Pinecone
import os
import tempfile
from github import Github, Repository
from git import Repo
from openai import OpenAI
from pathlib import Path
from langchain.schema import Document

  from tqdm.autonotebook import tqdm, trange


# Clone a GitHub Repo locally

In [3]:
def clone_repository(repo_url):
    """Clones a GitHub repository to a temporary directory.

    Args:
        repo_url: The URL of the GitHub repository.

    Returns:
        The path to the cloned repository.
    """
    repo_name = repo_url.split("/")[-1]  # Extract repository name from URL
    repo_path = f"/content/{repo_name}"
    Repo.clone_from(repo_url, str(repo_path))
    return str(repo_path)

In [4]:
path = clone_repository("https://github.com/CoderAgent/SecureAgent")

In [5]:
print(path)

/content/SecureAgent


In [6]:
SUPPORTED_EXTENSIONS = {'.py', '.js', '.tsx', '.jsx', '.ipynb', '.java',
                         '.cpp', '.ts', '.go', '.rs', '.vue', '.swift', '.c', '.h'}

IGNORED_DIRS = {'node_modules', 'venv', 'env', 'dist', 'build', '.git',
                '__pycache__', '.next', '.vscode', 'vendor'}

In [7]:
def get_file_content(file_path, repo_path):
    """
    Get content of a single file.

    Args:
        file_path (str): Path to the file

    Returns:
        Optional[Dict[str, str]]: Dictionary with file name and content
    """
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()

        # Get relative path from repo root
        rel_path = os.path.relpath(file_path, repo_path)

        return {
            "name": rel_path,
            "content": content
        }
    except Exception as e:
        print(f"Error processing file {file_path}: {str(e)}")
        return None


def get_main_files_content(repo_path: str):
    """
    Get content of supported code files from the local repository.

    Args:
        repo_path: Path to the local repository

    Returns:
        List of dictionaries containing file names and contents
    """
    files_content = []

    try:
        for root, _, files in os.walk(repo_path):
            # Skip if current directory is in ignored directories
            if any(ignored_dir in root for ignored_dir in IGNORED_DIRS):
                continue

            # Process each file in current directory
            for file in files:
                file_path = os.path.join(root, file)
                if os.path.splitext(file)[1] in SUPPORTED_EXTENSIONS:
                    file_content = get_file_content(file_path, repo_path)
                    if file_content:
                        files_content.append(file_content)

    except Exception as e:
        print(f"Error reading repository: {str(e)}")

    return files_content

In [8]:
file_content = get_main_files_content(path)

In [9]:
file_content

[{'name': 'src/env.ts',
  'content': 'import * as dotenv from "dotenv";\nimport { createPrivateKey } from "crypto";\nimport chalk from "chalk";\n\ndotenv.config();\n\nexport const env = {\n  GITHUB_APP_ID: process.env.GITHUB_APP_ID,\n  GITHUB_PRIVATE_KEY: process.env.GITHUB_PRIVATE_KEY,\n  GITHUB_WEBHOOK_SECRET: process.env.GITHUB_WEBHOOK_SECRET,\n  GROQ_API_KEY: process.env.GROQ_API_KEY,\n} as const;\n\nlet valid = true;\n\nfor (const key in env) {\n  if (!env[key as keyof typeof env]) {\n    console.log(\n      chalk.red("✖") +\n        chalk.gray(" Missing required env var: ") +\n        chalk.bold(`process.env.${key}`)\n    );\n    valid = false;\n  }\n}\n\ntry {\n  createPrivateKey(env.GITHUB_PRIVATE_KEY);\n} catch (error) {\n  console.log(\n    chalk.red(\n      "\\n✖ Invalid GitHub private key format for " +\n        chalk.bold(`process.env.GITHUB_PRIVATE_KEY`) +\n        "\\n"\n    ) +\n      chalk.gray("  • Must start with: ") +\n      chalk.bold("-----BEGIN RSA PRIVATE KEY---

# Embeddings

In [10]:
def get_huggingface_embeddings(text, model_name="sentence-transformers/all-mpnet-base-v2"):
    model = SentenceTransformer(model_name)
    return model.encode(text)

In [11]:
text = "I am a programmer"

embeddings = get_huggingface_embeddings(text)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [12]:
embeddings

array([ 1.81737654e-02, -3.02657508e-03, -4.77465875e-02,  1.86379403e-02,
        3.14537995e-02,  1.87255293e-02, -1.52534274e-02, -6.77293688e-02,
       -1.26903653e-02,  1.28427576e-02,  5.80701306e-02,  4.00234833e-02,
        3.27073298e-02,  7.12998435e-02,  5.56373373e-02,  1.68628506e-02,
        6.97603747e-02, -5.02619930e-02,  6.13140827e-03, -1.46559235e-02,
       -4.51957993e-03,  4.82934639e-02, -2.53051296e-02, -1.97862904e-03,
       -4.36902530e-02, -2.41507161e-02,  1.29505759e-02, -3.78611824e-03,
       -2.05718316e-02,  1.09819308e-01,  3.07672890e-03, -2.80443169e-02,
       -1.55807249e-02, -1.24789868e-02,  1.75239131e-06, -2.93756695e-03,
       -1.43048428e-02,  4.88386713e-02, -6.21114224e-02,  2.95061413e-02,
       -1.40470508e-02,  2.20708270e-02,  1.13067888e-02,  4.70893271e-02,
        7.58305984e-03, -8.30314530e-05,  6.67821169e-02, -1.21320095e-02,
        4.39386303e-03,  2.47453637e-02,  1.02529004e-02, -6.54432410e-03,
       -5.53147821e-03, -

# Setting up Pinecone
**1. Create an account on [Pinecone.io](https://app.pinecone.io/)**

**2. Create a new index called "codebase-rag" and set the dimensions to 768. Leave the rest of the settings as they are.**

![Screenshot 2024-11-24 at 10 58 50 PM](https://github.com/user-attachments/assets/f5fda046-4087-432a-a8c2-86e061005238)



**3. Create an API Key for Pinecone**

![Screenshot 2024-11-24 at 10 44 37 PM](https://github.com/user-attachments/assets/e7feacc6-2bd1-472a-82e5-659f65624a88)


**4. Store your Pinecone API Key within Google Colab's secrets section, and then enable access to it (see the blue checkmark)**

![Screenshot 2024-11-24 at 10 45 25 PM](https://github.com/user-attachments/assets/eaf73083-0b5f-4d17-9e0c-eab84f91b0bc)



In [13]:
# Set the PINECONE_API_KEY as an environment variable
pinecone_api_key = userdata.get("PINECONE_API_KEY")
os.environ['PINECONE_API_KEY'] = pinecone_api_key

# Initialize Pinecone
pc = Pinecone(api_key=userdata.get("PINECONE_API_KEY"),)

# Connect to your Pinecone index
pinecone_index = pc.Index("codebase-rag")

In [14]:
vectorstore = PineconeVectorStore(index_name="codebase-rag", embedding=HuggingFaceEmbeddings())

  vectorstore = PineconeVectorStore(index_name="codebase-rag", embedding=HuggingFaceEmbeddings())
  vectorstore = PineconeVectorStore(index_name="codebase-rag", embedding=HuggingFaceEmbeddings())


In [15]:
documents = []

for file in file_content:
    doc = Document(
        page_content=f"{file['name']}\n{file['content']}",
        metadata={"source": file['name']}
    )

    documents.append(doc)


vectorstore = PineconeVectorStore.from_documents(
    documents=documents,
    embedding=HuggingFaceEmbeddings(),
    index_name="codebase-rag",
    namespace="https://github.com/CoderAgent/SecureAgent"
)

  embedding=HuggingFaceEmbeddings(),


# Perform RAG

1. Get your Groq API Key [here](https://console.groq.com/keys)

2. Paste your Groq API Key into your Google Colab secrets, and make sure to enable permissions for it

![Screenshot 2024-11-25 at 12 00 16 AM](https://github.com/user-attachments/assets/e5525d29-bca6-4dbd-892b-cc770a6b281d)


In [16]:
client = OpenAI(
    base_url="https://api.groq.com/openai/v1",
    api_key=userdata.get("GROQ_API_KEY")
)

In [17]:
query = "How are python files parsed?"

In [18]:
raw_query_embedding = get_huggingface_embeddings(query)

raw_query_embedding

array([ 5.29357232e-02, -6.24647178e-02, -2.87437718e-02,  1.83179416e-02,
       -4.33840672e-04,  4.03239094e-02, -7.76652806e-03, -2.74394872e-03,
        2.53445264e-02, -8.10819939e-02, -8.44583288e-03, -6.59269514e-03,
        4.16187495e-02,  3.98627296e-02,  2.82911733e-02,  2.84344628e-02,
        2.65303329e-02, -2.60126498e-02,  4.16299142e-02,  3.92820686e-02,
       -5.15580364e-02,  5.83349541e-02,  5.88829117e-03,  3.46064568e-02,
       -2.46872660e-03,  2.72809248e-02,  1.07212560e-02,  4.55760621e-02,
       -1.69188846e-02, -4.85301390e-02, -3.02424375e-02, -3.29698175e-02,
        2.46010050e-02,  3.23601812e-02,  1.16030503e-06,  9.71379410e-03,
       -3.70800160e-02,  1.84200946e-02, -1.39834182e-02,  4.25723344e-02,
        6.78140819e-02, -6.66246563e-02,  2.11651716e-02, -1.11712900e-03,
       -1.80115458e-02, -7.90139660e-02,  5.93152717e-02, -5.23733571e-02,
        5.63013554e-02,  4.31280173e-02,  7.77091645e-03, -2.30586994e-02,
       -2.94572674e-02,  

In [19]:
# Feel free to change the "top_k" parameter to be a higher or lower number
top_matches = pinecone_index.query(vector=raw_query_embedding.tolist(), top_k=5, include_metadata=True, namespace="https://github.com/CoderAgent/SecureAgent")

In [20]:
top_matches

{'matches': [{'id': '7efb1a08-7c42-45f7-81b1-09477f1db1c1',
              'metadata': {'source': 'src/context/language/python-parser.ts',
                           'text': 'src/context/language/python-parser.ts\n'
                                   'import { AbstractParser, EnclosingContext '
                                   '} from "../../constants";\n'
                                   'export class PythonParser implements '
                                   'AbstractParser {\n'
                                   '  findEnclosingContext(\n'
                                   '    file: string,\n'
                                   '    lineStart: number,\n'
                                   '    lineEnd: number\n'
                                   '  ): EnclosingContext {\n'
                                   '    // TODO: Implement this method for '
                                   'Python\n'
                                   '    return null;\n'
                          

In [21]:
contexts = [item['metadata']['text'] for item in top_matches['matches']]

In [22]:
contexts

['src/context/language/python-parser.ts\nimport { AbstractParser, EnclosingContext } from "../../constants";\nexport class PythonParser implements AbstractParser {\n  findEnclosingContext(\n    file: string,\n    lineStart: number,\n    lineEnd: number\n  ): EnclosingContext {\n    // TODO: Implement this method for Python\n    return null;\n  }\n  dryRun(file: string): { valid: boolean; error: string } {\n    // TODO: Implement this method for Python\n    return { valid: false, error: "Not implemented yet" };\n  }\n}\n',
 'src/context/language/python-parser.ts\nimport { AbstractParser, EnclosingContext } from "../../constants";\nexport class PythonParser implements AbstractParser {\n  findEnclosingContext(\n    file: string,\n    lineStart: number,\n    lineEnd: number\n  ): EnclosingContext {\n    // TODO: Implement this method for Python\n    return null;\n  }\n  dryRun(file: string): { valid: boolean; error: string } {\n    // TODO: Implement this method for Python\n    return { va

In [23]:
augmented_query = "<CONTEXT>\n" + "\n\n-------\n\n".join(contexts[ : 10]) + "\n-------\n</CONTEXT>\n\n\n\nMY QUESTION:\n" + query

In [24]:
print(augmented_query)

<CONTEXT>
src/context/language/python-parser.ts
import { AbstractParser, EnclosingContext } from "../../constants";
export class PythonParser implements AbstractParser {
  findEnclosingContext(
    file: string,
    lineStart: number,
    lineEnd: number
  ): EnclosingContext {
    // TODO: Implement this method for Python
    return null;
  }
  dryRun(file: string): { valid: boolean; error: string } {
    // TODO: Implement this method for Python
    return { valid: false, error: "Not implemented yet" };
  }
}


-------

src/context/language/python-parser.ts
import { AbstractParser, EnclosingContext } from "../../constants";
export class PythonParser implements AbstractParser {
  findEnclosingContext(
    file: string,
    lineStart: number,
    lineEnd: number
  ): EnclosingContext {
    // TODO: Implement this method for Python
    return null;
  }
  dryRun(file: string): { valid: boolean; error: string } {
    // TODO: Implement this method for Python
    return { valid: false, err

In [25]:
system_prompt = f"""You are a Senior Software Engineer, specializing in TypeScript.

Answer any questions I have about the codebase, based on the code provided. Always consider all of the context provided when forming a response.
"""

llm_response = client.chat.completions.create(
    model="llama-3.1-70b-versatile",
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": augmented_query}
    ]
)

response = llm_response.choices[0].message.content

In [26]:
response

'Python files are not parsed currently in the codebase. The `PythonParser` class in `src/context/language/python-parser.ts` has two methods, `findEnclosingContext` and `dryRun`, but neither of them are implemented and simply return placeholder values.\n\nTo parse Python files, the `PythonParser` class would need to be modified to use a Python parser library (such as `ast.parse` from the Python standard library or a third-party library like `esprima`), similar to how the `JavascriptParser` class uses the `@babel/parser` library.\n\nHere is a hypothetical example of how the `findEnclosingContext` method might be implemented:\n\n```typescript\nimport * as ast from "@typescript-eslint/experimental-utils/dist/ast";\n\nexport class PythonParser implements AbstractParser {\n  findEnclosingContext(\n    file: string,\n    lineStart: number,\n    lineEnd: number\n  ): EnclosingContext {\n    const tree = ast.parse(file);\n    let largestEnclosingContext: ast.ASTNode = null;\n    let largestSize

# Putting it all together

In [27]:
from openai import OpenAIError

In [28]:
def perform_rag(query):
    raw_query_embedding = get_huggingface_embeddings(query)

    top_matches = pinecone_index.query(vector=raw_query_embedding.tolist(), top_k=5, include_metadata=True, namespace="https://github.com/CoderAgent/SecureAgent")

    # Get the list of retrieved texts
    contexts = [item['metadata']['text'] for item in top_matches['matches']]

    augmented_query = "<CONTEXT>\n" + "\n\n-------\n\n".join(contexts[ : 10]) + "\n-------\n</CONTEXT>\n\n\n\nMY QUESTION:\n" + query

    # Modify the prompt below as need to improve the response quality
    system_prompt = f"""You are a Senior Software Engineer, specializing in TypeScript.

    Answer any questions I have about the codebase, based on the code provided. Always consider all of the context provided when forming a response.
    """
    try:
      llm_response = client.chat.completions.create(
          model="llama-3.1-8b-instant",
          messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": augmented_query}
          ]
      )

      return llm_response.choices[0].message.content
    except OpenAIError as e:
      if e.http_status == 429 and "insufficient_quota" in str(e):  # Quota exceeded
            print(f"Quota exceeded for llama-3.1-8b-instant. Falling back to llama-3.1-70b-versatile.")
            try:
                # Attempt to use the fallback model
                llm_response = client.chat.completions.create(
                    model="llama-3.1-70b-versatile",
                    messages=[
                        {"role": "system", "content": system_prompt},
                        {"role": "user", "content": augmented_query}
                    ]
                )
                return llm_response.choices[0].message.content
            except OpenAIError as fallback_error:
                print(f"Error using fallback model llama-3.1-8b-instant: llama-3.1-70b-versatile")
                raise fallback_error  # Propagate fallback error if necessary
      else:
          print(f"Error with primary model llama-3.1-8b-instant: {e}")
          raise e  # Propagate other errors



In [29]:
response = perform_rag("How is the javascript parser used?")

print(response)

The JavaScript parser, `JavascriptParser`, appears to be a part of a larger system that is responsible for analyzing and processing JavaScript code. 

Based on the provided code, there are two main ways the `JavascriptParser` can be used:

1. **`findEnclosingContext` method**: This method takes a file path, a start line number, and an end line number as input, and returns the enclosing context of the code that spans the specified line range. The enclosing context is assumed to be the largest enclosing node in the Abstract Syntax Tree (AST) of the JavaScript code that covers the specified range.

2. **`dryRun` method**: This method takes a file path as input and returns information about whether the parsing of the file is valid or not. If the parsing is valid, it returns an empty error string, otherwise, it returns the error that occurred during parsing.

The usage of the `JavascriptParser` would typically involve the following steps:

- Create an instance of the `JavascriptParser` clas

#WebAPP


In [30]:
!pip install streamlit pyngrok python-dotenv langchain_openai

Collecting streamlit
  Downloading streamlit-1.40.2-py2.py3-none-any.whl.metadata (8.4 kB)
Collecting pyngrok
  Downloading pyngrok-7.2.1-py3-none-any.whl.metadata (8.3 kB)
Collecting langchain_openai
  Downloading langchain_openai-0.2.11-py3-none-any.whl.metadata (2.7 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.40.2-py2.py3-none-any.whl (8.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.6/8.6 MB[0m [31m76.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyngrok-7.2.1-py3-none-any.whl (22 kB)
Downloading langchain_openai-0.2.11-py3-none-any.whl (50 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.7/50.7 

In [31]:
from threading import Thread #We need the server to run in a separate thread
from pyngrok import ngrok #Allows us to create secure tunnels to localhost
from google.colab import userdata
import streamlit as st
from dotenv import load_dotenv
import google.generativeai as genai

In [32]:
ngrok_token = userdata.get('NGROK_AUTH_TOKEN')
ngrok.set_auth_token(ngrok_token)



In [33]:
def run_streamlit():
  os.system("streamlit run /content/app.py --server.port 8501")

In [34]:
load_dotenv()

genai.configure(api_key=os.getenv('GOOGLE_API_KEY'))

In [73]:
%%writefile app.py
import streamlit as st
from langchain_openai.chat_models import ChatOpenAI
from google.colab import userdata
from sentence_transformers import SentenceTransformer
from pinecone import Pinecone
from langchain_pinecone import PineconeVectorStore
from langchain.embeddings import OpenAIEmbeddings
from langchain_community.embeddings import HuggingFaceEmbeddings
from openai import OpenAIError
import os
from openai import OpenAI
from dotenv import load_dotenv

load_dotenv()
client = OpenAI(
    base_url="https://api.groq.com/openai/v1",
    api_key=os.getenv("GROQ_API_KEY")
)

st.title("Codebase Chat App")
openai_api_key = st.sidebar.text_input("OpenAI API Key", type="password")

# Set the PINECONE_API_KEY as an environment variable
pinecone_api_key = st.sidebar.text_input("Pinecone API Key", type="password")

pc = Pinecone(api_key=pinecone_api_key)

# Connect to your Pinecone index
pinecone_index = None
if pinecone_api_key:
  st.text("Enter Your Pinecone codebase:")
  user_input = st.text_input("")
  if user_input:
      pinecone_index = pc.Index(user_input)
      st.session_state.submitted = True
      st.write(f"Your Codebase: {user_input}")

def get_huggingface_embeddings(text, model_name="sentence-transformers/all-mpnet-base-v2"):
    model = SentenceTransformer(model_name)
    return model.encode(text)

def perform_rag(query):
    raw_query_embedding = get_huggingface_embeddings(query)
    top_matches = pinecone_index.query(vector=raw_query_embedding.tolist(), top_k=5, include_metadata=True, namespace="https://github.com/CoderAgent/SecureAgent")

    # Get the list of retrieved texts
    contexts = [item['metadata']['text'] for item in top_matches['matches']]

    augmented_query = "<CONTEXT>\n" + "\n\n-------\n\n".join(contexts[ : 10]) + "\n-------\n</CONTEXT>\n\n\n\nMY QUESTION:\n" + query

    # Modify the prompt below as need to improve the response quality
    system_prompt = f"""You are a Senior Software Engineer, specializing in TypeScript.

    Answer any questions I have about the codebase, based on the code provided. Always consider all of the context provided when forming a response.
    """
    try:
      llm_response = client.chat.completions.create(
          model="llama-3.1-8b-instant",
          messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": augmented_query}
          ]
      )

      return llm_response.choices[0].message.content
    except OpenAIError as e:
      if e.http_status == 429 and "insufficient_quota" in str(e):  # Quota exceeded
            print(f"Quota exceeded for llama-3.1-8b-instant. Falling back to llama-3.1-70b-versatile.")
            try:
                # Attempt to use the fallback model
                llm_response = client.chat.completions.create(
                    model="llama-3.1-70b-versatile",
                    messages=[
                        {"role": "system", "content": system_prompt},
                        {"role": "user", "content": augmented_query}
                    ],
                    stream=True,
                )
                return llm_response.choices[0].message.content
            except OpenAIError as fallback_error:
                print(f"Error using fallback model llama-3.1-8b-instant: llama-3.1-70b-versatile")
                raise fallback_error  # Propagate fallback error if necessary
      else:
          print(f"Error with primary model llama-3.1-8b-instant: {e}")
          raise e  # Propagate other errors



if "messages" not in st.session_state:
    st.session_state.messages = []

for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

if text := st.chat_input(""):
    st.session_state.messages.append({"role": "user", "content": text})
    with st.chat_message("user"):
        st.markdown(text)

    with st.chat_message("assistant"):
        response = st.write_stream(iter([perform_rag(text)]))
    st.session_state.messages.append({"role": "assistant", "content": response})



Overwriting app.py


In [36]:
thread = Thread(target=run_streamlit)
thread.start()

In [37]:
public_url = ngrok.connect(addr='8501', proto='http', bind_tls=True)
print("Public URL:", public_url)

Public URL: NgrokTunnel: "https://eec7-35-245-57-84.ngrok-free.app" -> "http://localhost:8501"


In [67]:
tunnels = ngrok.get_tunnels()
for tunnel in tunnels:
  print(f"Closing tunnel: {tunnel.public_url}->{tunnel.config['addr']}")
  ngrok.disconnect(tunnel.public_url)



Closing tunnel: https://d4e9-104-199-170-121.ngrok-free.app->http://localhost:8501
Closing tunnel: https://74a5-104-199-170-121.ngrok-free.app->http://localhost:8501




In [None]:
%%writefile .env
GOOGLE_API_KEY
GROQ_API_KEY

Overwriting .env
