In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Data Preprocessing - TSLA Stock Data\n",
    "## Using Stage 06 cleaning patterns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from pathlib import Path\n",
    "from src import utils, cleaning\n",
    "\n",
    "# Your data loading pattern\n",
    "raw_dir, processed_dir = utils.get_data_paths()\n",
    "tsla_files = list(raw_dir.glob('tsla_*.csv'))\n",
    "\n",
    "if tsla_files:\n",
    "    latest_file = max(tsla_files, key=lambda x: x.stat().st_mtime)\n",
    "    raw_data = pd.read_csv(latest_file)\n",
    "    print(f\"Loaded raw data: {latest_file.name}\")\n",
    "else:\n",
    "    print(\"No raw data found, using sample data...\")\n",
    "    raw_data = utils.generate_sample_tsla_data()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Your cleaning pattern\n",
    "print(\"Before cleaning - Missing values:\")\n",
    "print(raw_data.isnull().sum())\n",
    "\n",
    "cleaned_data = cleaning.clean_stock_data(raw_data)\n",
    "print(\"\\nAfter cleaning - Missing values:\")\n",
    "print(cleaned_data.isnull().sum())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Your feature engineering pattern\n",
    "feature_data = cleaning.calculate_technical_features(cleaned_data)\n",
    "print(\"Technical features added:\")\n",
    "print(feature_data[['Date', 'Close', 'SMA_10', 'Returns', 'Volatility']].tail())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Your target creation pattern\n",
    "model_data = cleaning.prepare_model_data(feature_data)\n",
    "print(\"Model data prepared:\")\n",
    "print(\"Target distribution:\", model_data['Target'].value_counts().to_dict())\n",
    "print(\"Final shape:\", model_data.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Your saving pattern\n",
    "from datetime import datetime\n",
    "\n",
    "timestamp = datetime.now().strftime('%Y%m%d_%H%M')\n",
    "output_file = f'tsla_processed_{timestamp}.csv'\n",
    "output_path = utils.save_dataframe(model_data, output_file, 'processed')\n",
    "print(f\"✓ Processed data saved to: {output_path}\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}