In [1]:
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1af2cb04",
   "metadata": {
    "vscode": {
     "languageId": "plaintext"
    }
   },
   "outputs": [],
   "source": [
    "{\n",
    " \"cells\": [\n",
    "  {\n",
    "   \"cell_type\": \"markdown\",\n",
    "   \"metadata\": {},\n",
    "   \"source\": [\n",
    "    \"# Data Ingestion and Feature Pipeline Demo\\n\",\n",
    "    \"This notebook demonstrates how to fetch market data and generate features.\"\n",
    "   ]\n",
    "  },\n",
    "  {\n",
    "   \"cell_type\": \"code\",\n",
    "   \"execution_count\": null,\n",
    "   \"metadata\": {},\n",
    "   \"source\": [\n",
    "    \"import sys\\n\",\n",
    "    \"sys.path.append('..')\\n\",\n",
    "    \"\\n\",\n",
    "    \"import pandas as pd\\n\",\n",
    "    \"import numpy as np\\n\",\n",
    "    \"import matplotlib.pyplot as plt\\n\",\n",
    "    \"from core.data_ingest import DataIngestion\\n\",\n",
    "    \"from core.features import FeaturePipeline\\n\",\n",
    "    \"\\n\",\n",
    "    \"%matplotlib inline\\n\",\n",
    "    \"plt.style.use('ggplot')\"\n",
    "   ]\n",
    "  },\n",
    "  {\n",
    "   \"cell_type\": \"markdown\",\n",
    "   \"metadata\": {},\n",
    "   \"source\": [\n",
    "    \"## Step 1: Fetch Market Data\"\n",
    "   ]\n",
    "  },\n",
    "  {\n",
    "   \"cell_type\": \"code\",\n",
    "   \"execution_count\": null,\n",
    "   \"metadata\": {},\n",
    "   \"source\": [\n",
    "    \"# Initialize data ingestion\\n\",\n",
    "    \"ingestor = DataIngestion()\\n\",\n",
    "    \"\\n\",\n",
    "    \"# Fetch sample symbols\\n\",\n",
    "    \"test_symbols = ['AAPL', 'MSFT']\\n\",\n",
    "    \"print(f\\\"Fetching data for: {test_symbols}\\\")\\n\",\n",
    "    \"\\n\",\n",
    "    \"data = ingestor.fetch_multiple_symbols(test_symbols, source='yahoo')\\n\",\n",
    "    \"print(f\\\"\\\\nFetched {len(data)} symbols successfully\\\")\"\n",
    "   ]\n",
    "  },\n",
    "  {\n",
    "   \"cell_type\": \"markdown\",\n",
    "   \"metadata\": {},\n",
    "   \"source\": [\n",
    "    \"## Step 2: Explore Raw Data\"\n",
    "   ]\n",
    "  },\n",
    "  {\n",
    "   \"cell_type\": \"code\",\n",
    "   \"execution_count\": null,\n",
    "   \"metadata\": {},\n",
    "   \"source\": [\n",
    "    \"# Look at AAPL data\\n\",\n",
    "    \"aapl_df = data['AAPL']\\n\",\n",
    "    \"print(\\\"AAPL Data Shape:\\\", aapl_df.shape)\\n\",\n",
    "    \"print(\\\"\\\\nFirst few rows:\\\")\\n\",\n",
    "    \"aapl_df.head()\"\n",
    "   ]\n",
    "  },\n",
    "  {\n",
    "   \"cell_type\": \"code\",\n",
    "   \"execution_count\": null,\n",
    "   \"metadata\": {},\n",
    "   \"source\": [\n",
    "    \"# Plot closing price\\n\",\n",
    "    \"plt.figure(figsize=(12, 6))\\n\",\n",
    "    \"plt.plot(aapl_df.index, aapl_df['Close'])\\n\",\n",
    "    \"plt.title('AAPL Closing Price')\\n\",\n",
    "    \"plt.xlabel('Date')\\n\",\n",
    "    \"plt.ylabel('Price ($)')\\n\",\n",
    "    \"plt.grid(True)\\n\",\n",
    "    \"plt.show()\"\n",
    "   ]\n",
    "  },\n",
    "  {\n",
    "   \"cell_type\": \"markdown\",\n",
    "   \"metadata\": {},\n",
    "   \"source\": [\n",
    "    \"## Step 3: Generate Features\"\n",
    "   ]\n",
    "  },\n",
    "  {\n",
    "   \"cell_type\": \"code\",\n",
    "   \"execution_count\": null,\n",
    "   \"metadata\": {},\n",
    "   \"source\": [\n",
    "    \"# Initialize feature pipeline\\n\",\n",
    "    \"pipeline = FeaturePipeline()\\n\",\n",
    "    \"\\n\",\n",
    "    \"# Process data\\n\",\n",
    "    \"feature_store = pipeline.process_universe(data, save=False)\\n\",\n",
    "    \"print(f\\\"\\\\nGenerated features for {len(feature_store)} symbols\\\")\"\n",
    "   ]\n",
    "  },\n",
    "  {\n",
    "   \"cell_type\": \"code\",\n",
    "   \"execution_count\": null,\n",
    "   \"metadata\": {},\n",
    "   \"source\": [\n",
    "    \"# Examine features for AAPL\\n\",\n",
    "    \"aapl_features = feature_store['AAPL']\\n\",\n",
    "    \"print(\\\"Feature columns:\\\")\\n\",\n",
    "    \"print(aapl_features.columns.tolist()[:20])  # Show first 20\\n\",\n",
    "    \"print(f\\\"\\\\nTotal features: {len(aapl_features.columns)}\\\")\"\n",
    "   ]\n",
    "  },\n",
    "  {\n",
    "   \"cell_type\": \"markdown\",\n",
    "   \"metadata\": {},\n",
    "   \"source\": [\n",
    "    \"## Step 4: Visualize Key Indicators\"\n",
    "   ]\n",
    "  },\n",
    "  {\n",
    "   \"cell_type\": \"code\",\n",
    "   \"execution_count\": null,\n",
    "   \"metadata\": {},\n",
    "   \"source\": [\n",
    "    \"# Plot RSI\\n\",\n",
    "    \"fig, axes = plt.subplots(3, 1, figsize=(14, 10))\\n\",\n",
    "    \"\\n\",\n",
    "    \"# Price\\n\",\n",
    "    \"axes[0].plot(aapl_features.index, aapl_features['close'])\\n\",\n",
    "    \"axes[0].set_title('AAPL Price')\\n\",\n",
    "    \"axes[0].set_ylabel('Price ($)')\\n\",\n",
    "    \"\\n\",\n",
    "    \"# RSI\\n\",\n",
    "    \"axes[1].plot(aapl_features.index, aapl_features['rsi_14'])\\n\",\n",
    "    \"axes[1].axhline(y=70, color='r', linestyle='--', label='Overbought')\\n\",\n",
    "    \"axes[1].axhline(y=30, color='g', linestyle='--', label='Oversold')\\n\",\n",
    "    \"axes[1].set_title('RSI (14)')\\n\",\n",
    "    \"axes[1].set_ylabel('RSI')\\n\",\n",
    "    \"axes[1].legend()\\n\",\n",
    "    \"\\n\",\n",
    "    \"# MACD\\n\",\n",
    "    \"axes[2].plot(aapl_features.index, aapl_features['macd'], label='MACD')\\n\",\n",
    "    \"axes[2].plot(aapl_features.index, aapl_features['macd_signal'], label='Signal')\\n\",\n",
    "    \"axes[2].bar(aapl_features.index, aapl_features['macd_hist'], label='Histogram', alpha=0.3)\\n\",\n",
    "    \"axes[2].set_title('MACD')\\n\",\n",
    "    \"axes[2].set_ylabel('Value')\\n\",\n",
    "    \"axes[2].set_xlabel('Date')\\n\",\n",
    "    \"axes[2].legend()\\n\",\n",
    "    \"\\n\",\n",
    "    \"plt.tight_layout()\\n\",\n",
    "    \"plt.show()\"\n",
    "   ]\n",
    "  },\n",
    "  {\n",
    "   \"cell_type\": \"markdown\",\n",
    "   \"metadata\": {},\n",
    "   \"source\": [\n",
    "    \"## Step 5: Feature Statistics\"\n",
    "   ]\n",
    "  },\n",
    "  {\n",
    "   \"cell_type\": \"code\",\n",
    "   \"execution_count\": null,\n",
    "   \"metadata\": {},\n",
    "   \"source\": [\n",
    "    \"# Get feature columns (exclude OHLCV and metadata)\\n\",\n",
    "    \"exclude_cols = ['open', 'high', 'low', 'close', 'volume', 'adj_close',\\n\",\n",
    "    \"                'symbol', 'source', 'fetch_timestamp',\\n\",\n",
    "    \"                'target', 'target_return', 'target_direction', 'target_binary']\\n\",\n",
    "    \"feature_cols = [c for c in aapl_features.columns if c not in exclude_cols]\\n\",\n",
    "    \"\\n\",\n",
    "    \"print(f\\\"Total feature columns: {len(feature_cols)}\\\")\\n\",\n",
    "    \"print(\\\"\\\\nSample statistics:\\\")\\n\",\n",
    "    \"aapl_features[feature_cols].describe()\"\n",
    "   ]\n",
    "  },\n",
    "  {\n",
    "   \"cell_type\": \"markdown\",\n",
    "   \"metadata\": {},\n",
    "   \"source\": [\n",
    "    \"## Summary\\n\",\n",
    "    \"This notebook demonstrated:\\n\",\n",
    "    \"1. Fetching data from Yahoo Finance\\n\",\n",
    "    \"2. Exploring raw OHLCV data\\n\",\n",
    "    \"3. Generating technical indicators\\n\",\n",
    "    \"4. Visualizing key features (RSI, MACD)\\n\",\n",
    "    \"5. Statistical overview of features\"\n",
    "   ]\n",
    "  }\n",
    " ],\n",
    " \"metadata\": {\n",
    "  \"kernelspec\": {\n",
    "   \"display_name\": \"Python 3\",\n",
    "   \"language\": \"python\",\n",
    "   \"name\": \"python3\"\n",
    "  },\n",
    "  \"language_info\": {\n",
    "   \"codemirror_mode\": {\n",
    "    \"name\": \"ipython\",\n",
    "    \"version\": 3\n",
    "   },\n",
    "   \"file_extension\": \".py\",\n",
    "   \"mimetype\": \"text/x-python\",\n",
    "   \"name\": \"python\",\n",
    "   \"nbconvert_exporter\": \"python\",\n",
    "   \"pygments_lexer\": \"ipython3\",\n",
    "   \"version\": \"3.8.0\"\n",
    "  }\n",
    " },\n",
    " \"nbformat\": 4,\n",
    " \"nbformat_minor\": 4\n",
    "}"
   ]
  }
 ],
 "metadata": {
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}

NameError: name 'null' is not defined