# Data Exploration
**Market Intelligence ML - Project 1**

Load and explore market data across asset classes.

In [None]:
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from src.data.fetch_data import fetch_multiple_assets, get_close_prices
from src.utils.config import load_config, get_asset_list, get_date_range
from src.data.preprocess import clean_data, calculate_returns

# Set plotting style
sns.set_style('darkgrid')
plt.rcParams['figure.figsize'] = (14, 6)

print('✅ Imports successful!')

## TODO:
- Fetch data for all assets
- Plot price history
- Calculate basic statistics
- Analyze correlations

In [None]:
# Calculate returns and statistics
returns = close_prices.pct_change().dropna()

print("📊 Return Statistics:\n")
print(returns.describe())

# Correlation matrix
print("\n🔗 Correlation Matrix:")
corr = returns.corr()

plt.figure(figsize=(12, 10))
sns.heatmap(corr, annot=True, fmt='.2f', cmap='coolwarm', center=0,
            square=True, linewidths=1, cbar_kws={"shrink": 0.8})
plt.title('Asset Return Correlations', fontsize=16, fontweight='bold')
plt.tight_layout()
plt.show()

print("\n✅ Data exploration complete! Ready for feature engineering.")

In [None]:
# Plot normalized prices
fig, ax = plt.subplots(figsize=(16, 8))

# Normalize to 100 at start
normalized = (close_prices / close_prices.iloc[0]) * 100

for col in normalized.columns:
    ax.plot(normalized.index, normalized[col], label=col, linewidth=2, alpha=0.8)

ax.set_title('Normalized Asset Prices (Base = 100)', fontsize=16, fontweight='bold')
ax.set_xlabel('Date', fontsize=12)
ax.set_ylabel('Normalized Price', fontsize=12)
ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

print("📈 Visualization complete!")

In [None]:
# Fetch all traditional assets
all_tickers = equities + fixed_income + alternatives + macro
data_dict = fetch_multiple_assets(all_tickers, start_date, end_date)

# Extract close prices
close_prices = get_close_prices(data_dict)
print(f"\n✅ Fetched {len(close_prices.columns)} assets")
print(f"Data shape: {close_prices.shape}")
print(f"Date range: {close_prices.index.min()} to {close_prices.index.max()}")

In [None]:
# Load configuration
config = load_config('../configs/config.yaml')
start_date, end_date, train_end, val_end = get_date_range(config)

print(f"Date Range: {start_date} to {end_date}")
print(f"Train End: {train_end}")
print(f"Val End: {val_end}")

# Get asset lists
equities = config['assets']['equities']
fixed_income = config['assets']['fixed_income']
alternatives = config['assets']['alternatives']
macro = config['assets']['macro']

print(f"\n📊 Assets to fetch:")
print(f"  Equities: {equities}")
print(f"  Fixed Income: {fixed_income}")
print(f"  Alternatives: {alternatives}")
print(f"  Macro: {macro}")