Skip to content

IndexError: list index out of range with CrewAI #262

@Tttjjjlll

Description

@Tttjjjlll

Hi guys, following is my code for crewai to read from a local document using RAG.

gemini = ChatGoogleGenerativeAI(model="gemini-pro",
                                verbose=True,
                                temperature=0.2,
                                google_api_key="",
                                max_output_tokens = 2048)


pdf_folder_path = "./pdf"
print(os.listdir(pdf_folder_path))
loaders = [UnstructuredPDFLoader(os.path.join(pdf_folder_path,fn)) for fn in os.listdir(pdf_folder_path)]
print(loaders)
all_documents = []

for loader in loaders:
    print("Loading raw documents ..." + loader.file_path)
    raw_documents = loader.load()
    print("splitting text")
    text_splitter = CharacterTextSplitter(
        separator = "\n",
        chunk_size = 1000,
        chunk_overlap  = 150,
        length_function = len,
    )
    documents = text_splitter.split_documents(raw_documents)
    all_documents.extend(documents)

print("Creating vectorstore...")

#vectorstore = FAISS.from_documents(all_documents,embeddings).as_retriever()

client = chromadb.Client()

if client.list_collections():
    consent_collection = client.create_collection("consent_collection")
else:
    print("Collection already exists")

vectordb = Chroma.from_documents(
    documents = all_documents,
    embedding = OpenAIEmbeddings(openai_api_key=""),
    persist_directory = "C:\\Users\\CS01\\Documents\\crewAI-examples-main\\crewAI-examples-main\\stock_analysis\\tools\\chroma_store\\"
)

vectordb.persist()

@tool("Get Local Documents")
def search_trend(query):
  """
  Useful to search for a given query from the local documents.
  The input to this tool should be a query to ask.
  
  """
  ask = query
  print("ASK....")
  print(ask)
  answer = __embedding_search(ask)
  return answer

def __embedding_search(ask):
 
  retriever = vectordb.similarity_search(ask)
  print("Retrieving info from vector database")
  print(retriever)
  return retriever

trend_list = ['Data and Machine Learning', 'Generative AI', 'Blockchain Technology']

func_list = []
for trend in trend_list:
    func_1 = Task(description=dedent(f"""
        Go step by step.

        You should ALWAYS use the Get Local Documents Tool to search for CRIMES information on {trend} and only use the information retrieved from the tool and write an in-depth elaboration of search.
        You DON'T NEED TO have access to the internet to get local documents.
        Your final answer MUST be at least 500 words.
        """),
        agent = writer_agent,
        tools=[search_trend]
    )
    
    func_list.append(func_1)


# Form the crew with a hierarchical process
project_crew = Crew(
    tasks=func_list, # Tasks that that manager will figure out how to complete
    agents=[writer_agent]
)

result = project_crew.kickoff()
print(result)

I encountered the following Error:

IndexError                                Traceback (most recent call last)
Cell In[13], [line 1](vscode-notebook-cell:?execution_count=13&line=1)
----> [1](vscode-notebook-cell:?execution_count=13&line=1) result = project_crew.kickoff()
      [2](vscode-notebook-cell:?execution_count=13&line=2) print(result)

File [c:\Users\CS01\miniconda3\lib\site-packages\crewai\crew.py:192](file:///C:/Users/CS01/miniconda3/lib/site-packages/crewai/crew.py:192), in Crew.kickoff(self)
    [189](file:///C:/Users/CS01/miniconda3/lib/site-packages/crewai/crew.py:189)         agent.create_agent_executor()
    [191](file:///C:/Users/CS01/miniconda3/lib/site-packages/crewai/crew.py:191) if self.process == Process.sequential:
--> [192](file:///C:/Users/CS01/miniconda3/lib/site-packages/crewai/crew.py:192)     return self._run_sequential_process()
    [193](file:///C:/Users/CS01/miniconda3/lib/site-packages/crewai/crew.py:193) if self.process == Process.hierarchical:
    [194](file:///C:/Users/CS01/miniconda3/lib/site-packages/crewai/crew.py:194)     return self._run_hierarchical_process()

File [c:\Users\CS01\miniconda3\lib\site-packages\crewai\crew.py:214](file:///C:/Users/CS01/miniconda3/lib/site-packages/crewai/crew.py:214), in Crew._run_sequential_process(self)
    [211](file:///C:/Users/CS01/miniconda3/lib/site-packages/crewai/crew.py:211) self._logger.log("debug", f"Working Agent: {role}")
    [212](file:///C:/Users/CS01/miniconda3/lib/site-packages/crewai/crew.py:212) self._logger.log("info", f"Starting Task: {task.description}")
--> [214](file:///C:/Users/CS01/miniconda3/lib/site-packages/crewai/crew.py:214) output = task.execute(context=task_output)
    [215](file:///C:/Users/CS01/miniconda3/lib/site-packages/crewai/crew.py:215) if not task.async_execution:
    [216](file:///C:/Users/CS01/miniconda3/lib/site-packages/crewai/crew.py:216)     task_output = output

File [c:\Users\CS01\miniconda3\lib\site-packages\crewai\task.py:104](file:///C:/Users/CS01/miniconda3/lib/site-packages/crewai/task.py:104), in Task.execute(self, agent, context, tools)
    [102](file:///C:/Users/CS01/miniconda3/lib/site-packages/crewai/task.py:102)     self.thread.start()
    [103](file:///C:/Users/CS01/miniconda3/lib/site-packages/crewai/task.py:103) else:
--> [104](file:///C:/Users/CS01/miniconda3/lib/site-packages/crewai/task.py:104)     result = self._execute(
    [105](file:///C:/Users/CS01/miniconda3/lib/site-packages/crewai/task.py:105)         task=self,
...
File [c:\Users\CS01\miniconda3\lib\site-packages\proto\marshal\collections\repeated.py:125](file:///C:/Users/CS01/miniconda3/lib/site-packages/proto/marshal/collections/repeated.py:125), in RepeatedComposite.__getitem__(self, key)
    [124](file:///C:/Users/CS01/miniconda3/lib/site-packages/proto/marshal/collections/repeated.py:124) def __getitem__(self, key):
--> [125](file:///C:/Users/CS01/miniconda3/lib/site-packages/proto/marshal/collections/repeated.py:125)     return self._marshal.to_python(self._pb_type, self.pb[key])

IndexError: list index out of range

May I know how I can resolve this issue? Is this issue pertaining to the code or crewai or gemini pro or langchain?
Appreciate any help I can get. Thank you!

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions