- Pinecone — Vector database for embeddings
- LangChain — Document ingestion, chunking, embedding, retrieval
- LangGraph — Stateful workflow graph with routing, grading, fallback
- CrewAI — Multi-agent collaboration (Researcher, Writer, Fact-Checker)
rag-project/
├── config/
│ ├── __init__.py
│ └── settings.py # All env vars & configuration
├── ingestion/
│ ├── __init__.py
│ ├── loader.py # Document loaders (PDF, TXT, CSV, Web)
│ ├── chunker.py # Text splitting strategies
│ └── embedder.py # Embed & store into Pinecone
├── retriever/
│ ├── __init__.py
│ └── pinecone_retriever.py # Retriever wrapper
├── graph/
│ ├── __init__.py
│ ├── state.py # Graph state definition
│ ├── nodes.py # All graph nodes (retrieve, grade, generate, etc.)
│ └── workflow.py # LangGraph workflow compilation
├── agents/
│ ├── __init__.py
│ ├── researcher.py # Research agent
│ ├── writer.py # Writer agent
│ ├── fact_checker.py # Fact-checker agent
│ └── crew.py # CrewAI crew assembly
├── tools/
│ ├── __init__.py
│ └── rag_tool.py # Custom CrewAI tool wrapping LangGraph
├── api/
│ ├── __init__.py
│ └── server.py # FastAPI server
├── tests/
│ ├── __init__.py
│ ├── test_ingestion.py
│ ├── test_retriever.py
│ ├── test_graph.py
│ └── test_crew.py
├── docs/ # Place your source documents here
├── .env.example
├── requirements.txt
├── setup.py
├── ingest.py # CLI: Ingest documents
├── query.py # CLI: Query the pipeline
└── main.py # CLI: Full pipeline with CrewAI
cd rag-project
python -m venv venv
source venv/bin/activate # Linux/Mac
# venv\Scripts\activate # Windows
pip install -r requirements.txtcp .env.example .env
# Edit .env with your API keyspython -c "from config.settings import settings; from ingestion.embedder import create_pinecone_index; create_pinecone_index()"Place your PDF, TXT, CSV, or MD files into the docs/ folder.
python ingest.py --source ./docspython query.py "What is the refund policy?"python main.py "Summarize the key findings from the quarterly report"python -m api.server
# Visit http://localhost:8000/docs for Swagger UI