Hi guys, following is my code for crewai to read from a local document using RAG.
gemini = ChatGoogleGenerativeAI(model="gemini-pro",
verbose=True,
temperature=0.2,
google_api_key="",
max_output_tokens = 2048)
pdf_folder_path = "./pdf"
print(os.listdir(pdf_folder_path))
loaders = [UnstructuredPDFLoader(os.path.join(pdf_folder_path,fn)) for fn in os.listdir(pdf_folder_path)]
print(loaders)
all_documents = []
for loader in loaders:
print("Loading raw documents ..." + loader.file_path)
raw_documents = loader.load()
print("splitting text")
text_splitter = CharacterTextSplitter(
separator = "\n",
chunk_size = 1000,
chunk_overlap = 150,
length_function = len,
)
documents = text_splitter.split_documents(raw_documents)
all_documents.extend(documents)
print("Creating vectorstore...")
#vectorstore = FAISS.from_documents(all_documents,embeddings).as_retriever()
client = chromadb.Client()
if client.list_collections():
consent_collection = client.create_collection("consent_collection")
else:
print("Collection already exists")
vectordb = Chroma.from_documents(
documents = all_documents,
embedding = OpenAIEmbeddings(openai_api_key=""),
persist_directory = "C:\\Users\\CS01\\Documents\\crewAI-examples-main\\crewAI-examples-main\\stock_analysis\\tools\\chroma_store\\"
)
vectordb.persist()
@tool("Get Local Documents")
def search_trend(query):
"""
Useful to search for a given query from the local documents.
The input to this tool should be a query to ask.
"""
ask = query
print("ASK....")
print(ask)
answer = __embedding_search(ask)
return answer
def __embedding_search(ask):
retriever = vectordb.similarity_search(ask)
print("Retrieving info from vector database")
print(retriever)
return retriever
trend_list = ['Data and Machine Learning', 'Generative AI', 'Blockchain Technology']
func_list = []
for trend in trend_list:
func_1 = Task(description=dedent(f"""
Go step by step.
You should ALWAYS use the Get Local Documents Tool to search for CRIMES information on {trend} and only use the information retrieved from the tool and write an in-depth elaboration of search.
You DON'T NEED TO have access to the internet to get local documents.
Your final answer MUST be at least 500 words.
"""),
agent = writer_agent,
tools=[search_trend]
)
func_list.append(func_1)
# Form the crew with a hierarchical process
project_crew = Crew(
tasks=func_list, # Tasks that that manager will figure out how to complete
agents=[writer_agent]
)
result = project_crew.kickoff()
print(result)
I encountered the following Error:
IndexError Traceback (most recent call last)
Cell In[13], [line 1](vscode-notebook-cell:?execution_count=13&line=1)
----> [1](vscode-notebook-cell:?execution_count=13&line=1) result = project_crew.kickoff()
[2](vscode-notebook-cell:?execution_count=13&line=2) print(result)
File [c:\Users\CS01\miniconda3\lib\site-packages\crewai\crew.py:192](file:///C:/Users/CS01/miniconda3/lib/site-packages/crewai/crew.py:192), in Crew.kickoff(self)
[189](file:///C:/Users/CS01/miniconda3/lib/site-packages/crewai/crew.py:189) agent.create_agent_executor()
[191](file:///C:/Users/CS01/miniconda3/lib/site-packages/crewai/crew.py:191) if self.process == Process.sequential:
--> [192](file:///C:/Users/CS01/miniconda3/lib/site-packages/crewai/crew.py:192) return self._run_sequential_process()
[193](file:///C:/Users/CS01/miniconda3/lib/site-packages/crewai/crew.py:193) if self.process == Process.hierarchical:
[194](file:///C:/Users/CS01/miniconda3/lib/site-packages/crewai/crew.py:194) return self._run_hierarchical_process()
File [c:\Users\CS01\miniconda3\lib\site-packages\crewai\crew.py:214](file:///C:/Users/CS01/miniconda3/lib/site-packages/crewai/crew.py:214), in Crew._run_sequential_process(self)
[211](file:///C:/Users/CS01/miniconda3/lib/site-packages/crewai/crew.py:211) self._logger.log("debug", f"Working Agent: {role}")
[212](file:///C:/Users/CS01/miniconda3/lib/site-packages/crewai/crew.py:212) self._logger.log("info", f"Starting Task: {task.description}")
--> [214](file:///C:/Users/CS01/miniconda3/lib/site-packages/crewai/crew.py:214) output = task.execute(context=task_output)
[215](file:///C:/Users/CS01/miniconda3/lib/site-packages/crewai/crew.py:215) if not task.async_execution:
[216](file:///C:/Users/CS01/miniconda3/lib/site-packages/crewai/crew.py:216) task_output = output
File [c:\Users\CS01\miniconda3\lib\site-packages\crewai\task.py:104](file:///C:/Users/CS01/miniconda3/lib/site-packages/crewai/task.py:104), in Task.execute(self, agent, context, tools)
[102](file:///C:/Users/CS01/miniconda3/lib/site-packages/crewai/task.py:102) self.thread.start()
[103](file:///C:/Users/CS01/miniconda3/lib/site-packages/crewai/task.py:103) else:
--> [104](file:///C:/Users/CS01/miniconda3/lib/site-packages/crewai/task.py:104) result = self._execute(
[105](file:///C:/Users/CS01/miniconda3/lib/site-packages/crewai/task.py:105) task=self,
...
File [c:\Users\CS01\miniconda3\lib\site-packages\proto\marshal\collections\repeated.py:125](file:///C:/Users/CS01/miniconda3/lib/site-packages/proto/marshal/collections/repeated.py:125), in RepeatedComposite.__getitem__(self, key)
[124](file:///C:/Users/CS01/miniconda3/lib/site-packages/proto/marshal/collections/repeated.py:124) def __getitem__(self, key):
--> [125](file:///C:/Users/CS01/miniconda3/lib/site-packages/proto/marshal/collections/repeated.py:125) return self._marshal.to_python(self._pb_type, self.pb[key])
IndexError: list index out of range
May I know how I can resolve this issue? Is this issue pertaining to the code or crewai or gemini pro or langchain?
Appreciate any help I can get. Thank you!
Hi guys, following is my code for crewai to read from a local document using RAG.
I encountered the following Error:
May I know how I can resolve this issue? Is this issue pertaining to the code or crewai or gemini pro or langchain?
Appreciate any help I can get. Thank you!