# Stage 03: Python Fundamentals for Financial Engineering

This notebook demonstrates core Python utilities and functions specifically designed for financial data analysis and risk management.

## Objectives
- Demonstrate financial utility functions
- Test data cleaning capabilities
- Validate risk calculation functions
- Show modular code organization

In [None]:
import sys
import os
sys.path.append('../src')

import pandas as pd
import numpy as np
from datetime import datetime
import utils
import cleaning
import matplotlib.pyplot as plt

print("🐍 Python Fundamentals for Financial Engineering")

## 1. Financial Data Utilities Demo

In [None]:
# Test basic data fetching
print("📊 Testing financial data utilities...")

# Fetch sample data
sample_data = utils.fetch_yfinance('AAPL', period='1mo')

if not sample_data.empty:
    print(f"✅ Data fetched successfully: {sample_data.shape}")
    print(f"Date range: {sample_data['date'].min()} to {sample_data['date'].max()}")
    
    # Display sample
    print("\nSample data:")
    print(sample_data[['date', 'open', 'high', 'low', 'close', 'volume']].head())
else:
    print("❌ Failed to fetch data")

## 2. Data Quality Assessment

In [None]:
if not sample_data.empty:
    # Generate data quality report
    print("🔍 Data Quality Assessment:")
    quality_report = utils.data_quality_report(sample_data)
    
    print(f"\nShape: {quality_report['shape']}")
    print(f"Memory usage: {quality_report['memory_usage_mb']:.2f} MB")
    print(f"Duplicate rows: {quality_report['duplicate_rows']}")
    
    # Check for missing data
    missing_data = {k: v for k, v in quality_report['missing_data'].items() if v > 0}
    if missing_data:
        print(f"Missing data: {missing_data}")
    else:
        print("✅ No missing data found")

## 3. Data Cleaning Functions

In [None]:
# Create sample data with missing values for testing
test_data = sample_data.copy()

# Introduce some missing values
np.random.seed(42)
missing_indices = np.random.choice(test_data.index, size=5, replace=False)
test_data.loc[missing_indices, 'close'] = np.nan

print(f"🧹 Testing data cleaning functions...")
print(f"Introduced {test_data['close'].isna().sum()} missing values")

# Test cleaning functions
cleaned_data = cleaning.fill_missing_median(test_data, columns=['close'])
print(f"✅ Missing values filled: {cleaned_data['close'].isna().sum()} remaining")

# Test normalization
normalized_data, scaler = cleaning.normalize_data(cleaned_data, columns=['volume'], method='standard')
print(f"✅ Volume normalized: mean={normalized_data['volume'].mean():.3f}, std={normalized_data['volume'].std():.3f}")

## 4. Financial Calculations

In [None]:
if not sample_data.empty:
    print("📈 Financial calculations demo:")
    
    # Calculate returns
    prices = sample_data['close']
    returns = prices.pct_change().dropna()
    
    print(f"\nReturns statistics:")
    print(f"  Mean daily return: {returns.mean():.4f}")
    print(f"  Daily volatility: {returns.std():.4f}")
    print(f"  Annualized return: {returns.mean() * 252:.4f}")
    print(f"  Annualized volatility: {returns.std() * np.sqrt(252):.4f}")
    
    # Calculate Sharpe ratio (assuming 2% risk-free rate)
    risk_free_rate = 0.02
    excess_returns = returns - (risk_free_rate / 252)
    sharpe_ratio = (excess_returns.mean() * 252) / (returns.std() * np.sqrt(252))
    print(f"  Sharpe ratio: {sharpe_ratio:.4f}")
    
    # Value at Risk (95%)
    var_95 = returns.quantile(0.05)
    print(f"  VaR (95%): {var_95:.4f}")

## 5. Data Storage and Retrieval

In [None]:
if not sample_data.empty:
    print("💾 Testing data storage functions...")
    
    # Save data with timestamp
    saved_path = utils.save_with_timestamp(
        df=sample_data,
        prefix="python_fundamentals_test",
        source="testing",
        ext="csv"
    )
    
    print(f"✅ Data saved to: {saved_path}")
    
    # Verify file exists
    if os.path.exists(saved_path):
        file_size = os.path.getsize(saved_path) / 1024  # KB
        print(f"✅ File verified: {file_size:.1f} KB")
    else:
        print("❌ File not found")

## 6. Visualization Demo

In [None]:
if not sample_data.empty:
    print("📊 Creating visualizations...")
    
    # Create price and returns plots
    fig, axes = plt.subplots(2, 1, figsize=(12, 8))
    
    # Price chart
    axes[0].plot(sample_data['date'], sample_data['close'], 'b-', linewidth=2)
    axes[0].set_title('AAPL Stock Price')
    axes[0].set_ylabel('Price ($)')
    axes[0].grid(True, alpha=0.3)
    
    # Returns chart
    returns = sample_data['close'].pct_change().dropna()
    axes[1].plot(sample_data['date'][1:], returns, 'r-', alpha=0.7)
    axes[1].set_title('Daily Returns')
    axes[1].set_ylabel('Return')
    axes[1].set_xlabel('Date')
    axes[1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    print("✅ Visualizations created")

## 7. Module Organization Demo

In [None]:
# Demonstrate modular organization
print("🏗️ Module Organization:")

# List available functions in utils module
utils_functions = [func for func in dir(utils) if not func.startswith('_')]
print(f"\nutils module functions ({len(utils_functions)}):")
for func in utils_functions[:10]:  # Show first 10
    print(f"  - {func}")
if len(utils_functions) > 10:
    print(f"  ... and {len(utils_functions) - 10} more")

# List available functions in cleaning module
cleaning_functions = [func for func in dir(cleaning) if not func.startswith('_')]
print(f"\ncleaning module functions ({len(cleaning_functions)}):")
for func in cleaning_functions:
    print(f"  - {func}")

## 8. Summary

In [None]:
print("\n🎯 Stage 03 Summary:")
print("✅ Financial data utilities tested")
print("✅ Data cleaning functions validated")
print("✅ Risk calculations demonstrated")
print("✅ Data storage/retrieval working")
print("✅ Visualization capabilities shown")
print("✅ Modular code organization confirmed")

print("\n📋 Python Fundamentals Ready:")
print("  - Financial data fetching and processing")
print("  - Data quality assessment and cleaning")
print("  - Risk metric calculations")
print("  - Automated data storage with timestamps")
print("  - Comprehensive error handling")

print("\n🚀 Ready to proceed to Stage 04: Data Acquisition")