In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Privacy-Preserving NLP Analysis\n",
    "\n",
    "This notebook demonstrates the usage of our privacy-preserving NLP system."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "import sys\n",
    "sys.path.append('..')\n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "from src.preprocessing.data_processor import DataPreprocessor\n",
    "from src.models.text_classifier import SimpleTextClassifier\n",
    "from src.federated.client import FederatedClient\n",
    "from src.federated.server import FederatedServer"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Data Preprocessing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "# Initialize preprocessor\n",
    "preprocessor = DataPreprocessor()\n",
    "\n",
    "# Load and process data\n",
    "data_path = '../data/raw/cord19_dataset.csv'\n",
    "df, features = preprocessor.process_data(data_path)\n",
    "\n",
    "print(f\"Processed {len(df)} documents\")\n",
    "print(f\"Feature matrix shape: {features.shape}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Model Training"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "# Initialize model\n",
    "model = SimpleTextClassifier(\n",
    "    input_dim=features.shape[1],\n",
    "    hidden_dim=256,\n",
    "    output_dim=5\n",
    ")\n",
    "\n",
    "# Setup federated learning\n",
    "server = FederatedServer(model, num_clients=5)\n",
    "\n",
    "# Train model\n",
    "metrics_history = server.train_federated_model(num_rounds=10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Results Analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "# Plot training metrics\n",
    "plt.figure(figsize=(10, 6))\n",
    "plt.plot(metrics_history['loss'], label='Loss')\n",
    "plt.plot(metrics_history['accuracy'], label='Accuracy')\n",
    "plt.title('Training Metrics')\n",
    "plt.xlabel('Round')\n",
    "plt.ylabel('Value')\n",
    "plt.legend()\n",
    "plt.grid(True)\n",
    "plt.show()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python"
  }
 }
}