# Stage 04: Financial Data Acquisition

This notebook demonstrates data acquisition from financial APIs for the Portfolio Risk Management System.

## Objectives
- Set up API connections (Alpha Vantage, yfinance)
- Fetch historical stock data
- Implement error handling and rate limiting
- Save data with proper structure

In [None]:
import sys
import os
sys.path.append('../src')

import pandas as pd
import numpy as np
from datetime import datetime
import utils
from dotenv import load_dotenv

# Load environment variables
load_dotenv('../.env')

print("📊 Financial Data Acquisition Setup Complete")

## 1. Test API Connections

In [None]:
# Test yfinance connection (no API key required)
test_symbol = "AAPL"
print(f"Testing yfinance connection with {test_symbol}...")

df_test = utils.fetch_yfinance(test_symbol, period="1mo")
if not df_test.empty:
    print(f"✅ yfinance working: {len(df_test)} records fetched")
    print(df_test.head())
else:
    print("❌ yfinance test failed")

## 2. Fetch Multiple Stock Data

In [None]:
# Define portfolio symbols for risk analysis
portfolio_symbols = ["AAPL", "MSFT", "GOOGL", "AMZN", "TSLA"]

print(f"Fetching data for portfolio: {portfolio_symbols}")

# Fetch data using yfinance (reliable and free)
portfolio_data = utils.fetch_multiple_stocks(
    symbols=portfolio_symbols,
    prefer_alphavantage=False,  # Use yfinance for reliability
    period="6mo"
)

if not portfolio_data.empty:
    print(f"\n✅ Portfolio data fetched successfully")
    print(f"Shape: {portfolio_data.shape}")
    print(f"Date range: {portfolio_data['date'].min()} to {portfolio_data['date'].max()}")
    print(f"Symbols: {portfolio_data['symbol'].unique()}")
else:
    print("❌ Failed to fetch portfolio data")

## 3. Data Quality Assessment

In [None]:
if not portfolio_data.empty:
    # Generate data quality report
    quality_report = utils.data_quality_report(portfolio_data)
    
    print("📊 Data Quality Report:")
    print(f"Shape: {quality_report['shape']}")
    print(f"Memory usage: {quality_report['memory_usage_mb']:.2f} MB")
    print(f"Duplicate rows: {quality_report['duplicate_rows']}")
    
    print("\nMissing data:")
    for col, count in quality_report['missing_data'].items():
        if count > 0:
            print(f"  {col}: {count} missing values")
    
    # Validate required columns
    required_cols = ['date', 'open', 'high', 'low', 'close', 'volume', 'symbol']
    try:
        utils.validate_dataframe(portfolio_data, required_cols=required_cols)
    except Exception as e:
        print(f"❌ Validation failed: {e}")

## 4. Save Raw Data

In [None]:
if not portfolio_data.empty:
    # Save raw portfolio data
    saved_path = utils.save_with_timestamp(
        df=portfolio_data,
        prefix="portfolio_raw",
        source="financial_apis",
        ext="csv"
    )
    
    print(f"💾 Raw data saved to: {saved_path}")
    
    # Also save as JSON for backup
    json_path = utils.save_with_timestamp(
        df=portfolio_data,
        prefix="portfolio_raw",
        source="financial_apis",
        ext="json"
    )
    
    print(f"💾 Backup JSON saved to: {json_path}")

## 5. Summary and Next Steps

In [None]:
print("\n🎯 Stage 04 Summary:")
print("✅ API connections tested")
print("✅ Portfolio data acquired")
print("✅ Data quality assessed")
print("✅ Raw data saved with timestamps")

print("\n📋 Next Steps:")
print("- Stage 05: Set up data storage infrastructure")
print("- Stage 06: Create preprocessing pipeline")
print("- Stage 07: Build risk analysis models")