In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Getting Started with MAPS\n",
    "\n",
    "Multi-Agent Processing System (MAPS) is a framework for building conversational multi-agent systems for data cleaning, processing, and visualization. This notebook will guide you through the basics of setting up and using MAPS.\n",
    "\n",
    "## Setup\n",
    "\n",
    "First, let's import the necessary modules and initialize the system components."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "import os\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "\n",
    "# Add the parent directory to the path if running in a notebook\n",
    "notebook_path = os.path.abspath('')\n",
    "project_root = os.path.join(notebook_path, '..')\n",
    "if project_root not in sys.path:\n",
    "    sys.path.append(project_root)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Import MAPS components\n",
    "from core.memory_system import MemorySystem\n",
    "from core.function_registry import FunctionRegistry\n",
    "from core.conversation_manager import ConversationManager\n",
    "\n",
    "from agents.data_engineer import DataEngineerAgent\n",
    "from agents.viz_specialist import VizSpecialistAgent\n",
    "from agents.memory_agent import MemoryAgent\n",
    "from agents.orchestrator import OrchestratorAgent\n",
    "\n",
    "from config.system_config import DATA_DIRS\n",
    "from utils.sample_data import create_sales_data\n",
    "\n",
    "# Create required directories\n",
    "for dir_path in DATA_DIRS.values():\n",
    "    os.makedirs(dir_path, exist_ok=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Initialize System Components\n",
    "\n",
    "Now let's initialize all the components of our system."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Initialize core components\n",
    "memory_system = MemorySystem(persistence_path=DATA_DIRS[\"memory\"])\n",
    "function_registry = FunctionRegistry()\n",
    "conversation_manager = ConversationManager(memory_system, persistence_path=DATA_DIRS[\"conversations\"])\n",
    "\n",
    "# Initialize agents\n",
    "data_engineer = DataEngineerAgent(memory_system, function_registry)\n",
    "viz_specialist = VizSpecialistAgent(\n",
    "    memory_system, \n",
    "    function_registry,\n",
    "    output_dir=DATA_DIRS[\"visualizations\"]\n",
    ")\n",
    "memory_agent = MemoryAgent(memory_system, conversation_manager)\n",
    "\n",
    "# Initialize orchestrator\n",
    "orchestrator = OrchestratorAgent(\n",
    "    data_engineer,\n",
    "    viz_specialist,\n",
    "    memory_agent,\n",
    "    conversation_manager\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Create Sample Data\n",
    "\n",
    "Let's create some sample data to work with."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create or load sample sales data\n",
    "sample_file = os.path.join(DATA_DIRS[\"sample_datasets\"], \"notebook_sales_data.csv\")\n",
    "\n",
    "if not os.path.exists(sample_file) or os.path.getsize(sample_file) == 0:\n",
    "    # Create sample sales data\n",
    "    np.random.seed(42)\n",
    "    \n",
    "    n_rows = 500\n",
    "    \n",
    "    regions = ['North', 'South', 'East', 'West', 'Central']\n",
    "    product_categories = ['Electronics', 'Clothing', 'Food', 'Home', 'Office']\n",
    "    \n",
    "    data = {\n",
    "        'date': pd.date_range(start='2023-01-01', periods=n_rows).astype(str),\n",
    "        'product_id': np.random.randint(1000, 9999, size=n_rows),\n",
    "        'product_category': np.random.choice(product_categories, size=n_rows),\n",
    "        'region': np.random.choice(regions, size=n_rows),\n",
    "        'price': np.round(np.random.uniform(10, 1000, size=n_rows), 2),\n",
    "        'quantity': np.random.randint(1, 50, size=n_rows),\n",
    "        'customer_id': np.random.randint(10000, 99999, size=n_rows)\n",
    "    }\n",
    "    \n",
    "    # Introduce some missing values\n",
    "    for col in ['price', 'quantity', 'region']:\n",
    "        mask = np.random.choice([True, False], size=n_rows, p=[0.05, 0.95])\n",
    "        data[col] = pd.Series(data[col])\n",
    "        data[col][mask] = None\n",
    "        \n",
    "    df = pd.DataFrame(data)\n",
    "    \n",
    "    # Save to CSV\n",
    "    os.makedirs(os.path.dirname(sample_file), exist_ok=True)\n",
    "    df.to_csv(sample_file, index=False)\n",
    "    \n",
    "    print(f\"Sample data created and saved to {sample_file}\")\n",
    "else:\n",
    "    df = pd.read_csv(sample_file)\n",
    "    print(f\"Loaded existing sample data from {sample_file}\")\n",
    "\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Using the System\n",
    "\n",
    "Now let's interact with our system by sending queries to the orchestrator."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Helper function to display outputs nicely\n",
    "def process_and_display(query):\n",
    "    print(f\"\\nQuery: {query}\")\n",
    "    result = orchestrator.process_query(query)\n",
    "    print(f\"Response: {result['message']}\")\n",
    "    \n",
    "    # If there's a visualization, display it\n",
    "    if result['success'] and 'result' in result and result['result']:\n",
    "        if 'filepath' in result['result']:\n",
    "            from IPython.display import Image\n",
    "            display(Image(filename=result['result']['filepath']))\n",
    "            \n",
    "    return result"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Load Data\n",
    "\n",
    "First, let's load our sample data."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "process_and_display(f\"Load data from '{sample_file}'\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Explore the Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "process_and_display(\"Show me the first 5 rows of data\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "process_and_display(\"What are the missing values in this dataset?\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Clean the Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "process_and_display(\"Drop rows with missing values\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Transform the Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "process_and_display(\"Create a new column 'total_revenue' as 'price' times 'quantity'\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Visualize the Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "process_and_display(\"Show the distribution of total_revenue\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "process_and_display(\"Create a bar chart of average total_revenue by region\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "process_and_display(\"Show a scatter plot of price vs quantity\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Check Memory and Context"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "process_and_display(\"Summarize what we've done so far\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "process_and_display(\"List all visualizations we've created\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Save System State"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "process_and_display(\"Save a checkpoint of our current state\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Conclusion\n",
    "\n",
    "This notebook demonstrated the basic functionality of the MAPS system. You can:\n",
    "\n",
    "1. Load and explore data\n",
    "2. Clean and transform data\n",
    "3. Create visualizations\n",
    "4. Keep track of operations and context\n",
    "5. Save system state for later use\n",
    "\n",
    "Feel free to experiment with other queries and explore the system's capabilities!"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}