In [None]:
!pip install smolagents==1.4.1

In [1]:
from smolagents.models import OpenAIServerModel
import os
from dotenv import load_dotenv
load_dotenv()

model_id="meta/llama-3.1-405b-instruct"

base_url = 'https://integrate.api.nvidia.com/v1'
api_key=os.environ["NVIDIA_API_KEY"]
model=OpenAIServerModel(model_id=model_id,
        api_base=base_url,
        api_key=os.environ["NVIDIA_API_KEY"],
        temperature=0.1)

In [2]:
from langchain_nvidia_ai_endpoints import ChatNVIDIA
from langchain import prompts, chat_models, hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field, validator
from typing import Optional, List

## construct the system prompt 
llm= ChatNVIDIA(model=model_id, api_key=api_key, temperature=0.1)
def extract_github_issues(user_query):
    prompt_template = """
    ### [INST]

    You are an expert on extracting information to search on github issues per given user input query
    user query
    ------
    {user_query}
    ------

    The output promotion message MUST use the following format :

    '''
    status: whether the github issue is open or closed, if not specify, then use all
    keywords: extracting keywords which could potentailly be used to search on github issues
    '''
    Begin!
    [/INST]
     """
    prompt = PromptTemplate(
    input_variables=['user_query'],
    template=prompt_template)

    ## structural output using LMFE 
    class ExtractIssues(BaseModel):     
        status: str = Field(description="extract the github issue status, whether it is open or closed from input user query")
        keywords : str = Field(description="extract the keywords which users could potentially used to search on github issues")

    extract_github_issues=llm.with_structured_output(ExtractIssues)     

    ## construct the content_creator agent
    content_creator = ( prompt | extract_github_issues )
    out=content_creator.invoke({"user_query":user_query})
    return out


For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  exec(code_obj, self.user_global_ns, self.user_ns)


In [5]:
for repo in token.get_user().get_repos():
    print(repo.name)

gpubootcamp
megatron2
A100TF2
ActiveLearning_exp
ADAS
Agentless
Agent_and_Graph
AITrials
apex_amp_DALI
Applied-Machine-Learning
ASR_covid
autoKeras
Azure
azure-quickstart-templates
azure-rm-vm
BERT_QnA_maker
bloomfilter-rb
capsule-networks
char-rnn
Coswara-Data
CycleGans
datasciencecoursera
datasharing
deepsort_modified
demo_customNMT
dlib
DockerExp
dreamgaussian
dspy
example-models
fastai
genai_langserve_apple
GenerativeAIExamples
gpubootcamp
hackathon
HF_KBbert_Classification
HF_TRL_LORA
HybridAGI
ImageAI
Intro
Janus
jupyter-ai-agent
keras
Keras_bbox_regressor
kivycrashcourse
langchain
langgraphstudio_zcharpy
langserve
LearnAI-Bootcamp
learnopencv
learn_Cuda
machine_learning_examples
MakeYourOwnCoCoDataset
Megatron-LM
MelanomaWebApp
mem0
MinHash
mini_projects
MLOps_LengthOfStay
MLOps_Pneumonia
MMDS
moatless-tree-search
MOE
nativePytorch_NMT
NeMo
NeMo-Aligner
NeMo-Megatron-Launcher
NeMo_ASR_TransferLearning_Swedish
neural-networks
neural-networks-and-deep-learning
NGC_exp
NLP_Bert_Pyt

In [27]:
from github import Github
from langchain.docstore.document import Document
token = Github(os.environ["GITHUB_TOKEN"])

repo = token.get_repo('NVIDIA/NeMo-Guardrails')
user_query="how to install annoy on windows"
#out=extract_github_issues(user_query)
#status=out.status
#query=out.keywords

open_issues=[]
issue_cnt=0
            

for issue in repo.get_issues(state='open'):
    comment_in_same_issue=[]
    metadata={'issue_nr':issue_cnt, "status":"open", "repo":"NVIDIA/NeMo-Guardrails"}
    for comment in issue.get_comments():
        
        comment_text=comment.body.encode("utf-8")
        if len(comment_text)>=1:
            doc=Document(page_content=comment_text, metadata=metadata)
            open_issues.append(doc)
    issue_cnt+=1

In [28]:
open_issues[3]

Document(metadata={'issue_nr': 6, 'status': 'open', 'repo': 'NVIDIA/NeMo-Guardrails'}, page_content="Hi @Pouyanpi, thanks a lot for your response. I've created a gist [here](https://gist.github.com/ta-dr0aid/1fc2e2f7469ec1452d962cf6244f962a) and included the setup for the local ollama environment. Outputs and steps to reproduce are added as well. ")

In [30]:
import os

import datasets
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma

# from langchain_community.document_loaders import PyPDFLoader
from langchain_huggingface import HuggingFaceEmbeddings
from tqdm import tqdm
from transformers import AutoTokenizer

# from langchain_openai import OpenAIEmbeddings
from smolagents import LiteLLMModel, Tool
from smolagents.agents import CodeAgent
from smolagents.agents import ToolCallingAgent



knowledge_base = datasets.load_dataset("m-ric/huggingface_doc", split="train[:5%]",)



## For your own PDFs, you can use the following code to load them into source_docs
# pdf_directory = "pdfs"
# pdf_files = [
#     os.path.join(pdf_directory, f)
#     for f in os.listdir(pdf_directory)
#     if f.endswith(".pdf")
# ]
# source_docs = []

# for file_path in pdf_files:
#     loader = PyPDFLoader(file_path)
#     docs.extend(loader.load())

text_splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer(
    AutoTokenizer.from_pretrained("thenlper/gte-small"),
    chunk_size=200,
    chunk_overlap=20,
    add_start_index=True,
    strip_whitespace=True,
    separators=["\n\n", "\n", ".", " ", ""],
)

# Split docs and keep only unique ones
print("Splitting documents...")
docs_processed = []
unique_texts = {}
for doc in tqdm(open_issues):
    new_docs = text_splitter.split_documents([doc])
    for new_doc in new_docs:
        if new_doc.page_content not in unique_texts:
            unique_texts[new_doc.page_content] = True
            docs_processed.append(new_doc)


print("Embedding documents... This should take a few minutes (5 minutes on MacBook with M1 Pro)")
# Initialize embeddings and ChromaDB vector store
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


# embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

vector_store = Chroma.from_documents(docs_processed, embeddings, persist_directory="./chroma_db")


class RetrieverTool(Tool):
    name = "retriever"
    description = (
        "Uses semantic search to retrieve the parts of documentation that could be most relevant to answer your query."
    )
    inputs = {
        "query": {
            "type": "string",
            "description": "The query to perform. This should be semantically close to your target documents. Use the affirmative form rather than a question.",
        }
    }
    output_type = "string"

    def __init__(self, vector_store, **kwargs):
        super().__init__(**kwargs)
        self.vector_store = vector_store

    def forward(self, query: str) -> str:
        assert isinstance(query, str), "Your search query must be a string"
        docs = self.vector_store.similarity_search(query, k=3)
        return "\nRetrieved documents:\n" + "".join(
            [f"\n\n===== Document {str(i)} =====\n" + doc.page_content for i, doc in enumerate(docs)]
        )


retriever_tool = RetrieverTool(vector_store)


Splitting documents...


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 214/214 [00:00<00:00, 599.76it/s]


Embedding documents... This should take a few minutes (5 minutes on MacBook with M1 Pro)


In [31]:
out=retriever_tool.forward("how do i install annoy on windows ")
[out.split(f"\n\n===== Document {str(i)} =====\n")[1] for i in range(3)]

['Windows: Download [this executable](https://drive.google.com/file/d/1sqFxbEdTMubjVktnV4C6ICjp89wLhUcP/view?usp=sharing)\n\nLinux (Ubuntu): Download [this executable](https://drive.google.com/file/d/1KuqBKYiXiIcU4kNMqEzhgypuFP5_45CL/view?usp=sharing)\n\n===== Document 1 =====\nWindows: Download [this executable](https://drive.google.com/file/d/1sqFxbEdTMubjVktnV4C6ICjp89wLhUcP/view?usp=sharing)\n\nLinux (Ubuntu): Download [this executable](https://drive.google.com/file/d/1KuqBKYiXiIcU4kNMqEzhgypuFP5_45CL/view?usp=sharing)\n\n===== Document 2 =====\nWindows: Download [this executable](https://drive.google.com/file/d/1sqFxbEdTMubjVktnV4C6ICjp89wLhUcP/view?usp=sharing)\n\nLinux (Ubuntu): Download [this executable](https://drive.google.com/file/d/1KuqBKYiXiIcU4kNMqEzhgypuFP5_45CL/view?usp=sharing)',
 'Windows: Download [this executable](https://drive.google.com/file/d/1sqFxbEdTMubjVktnV4C6ICjp89wLhUcP/view?usp=sharing)\n\nLinux (Ubuntu): Download [this executable](https://drive.google.com

In [32]:
from  smolagents import Tool
from smolagents import ToolCallingAgent
from smolagents.models import OpenAIServerModel

model=OpenAIServerModel(model_id=model_id,
        api_base=base_url,
        api_key=os.environ["NVIDIA_API_KEY"],
        temperature=0.1)

#agent = ToolCallingAgent( tools=[retriever_tool], model=model)
agent = CodeAgent(tools=[retriever_tool], model=model, add_base_tools=False)
 
agent_output = agent.run("how do i register a custom action? ")

In [34]:
agent_output

'You can register custom actions using LLMRails.register_action or the @action() decorator from nemoguardrails.actions.actions. Make sure to declare a parameter with the same name as the registered action.'