# Factor Research

This notebook explores individual factors and their relationships with returns.

## Objectives
1. Compute individual factors (Value, Momentum, Quality, Size)
2. Analyze factor distributions and correlations
3. Examine factor-return relationships
4. Create factor decile portfolios


In [None]:
import sys
from pathlib import Path

# Add src to path
project_root = Path().resolve().parent.parent
sys.path.insert(0, str(project_root))

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from src.data_loader import load_universe, download_price_data, download_fundamental_data
from src.data_preprocess import preprocess_data
from src.factors import (
    compute_value_factor_panel,
    compute_momentum_factor_panel,
    compute_quality_factor_panel,
    compute_size_factor_panel,
)
from src.utils.config import BACKTEST_CONFIG, FACTOR_CONFIG

plt.style.use('seaborn-v0_8' if 'seaborn-v0_8' in plt.style.available else 'default')


In [None]:
# Load and preprocess data (using smaller sample for faster execution)
universe_df = load_universe()
sample_tickers = universe_df['ticker'].head(50).tolist()  # Use 50 tickers for research

data_start_date = pd.Timestamp(BACKTEST_CONFIG.start_date) - pd.DateOffset(months=15)
data_start_date_str = data_start_date.strftime('%Y-%m-%d')

price_data = download_price_data(sample_tickers, data_start_date_str, BACKTEST_CONFIG.end_date, cache=True)
fundamental_data = download_fundamental_data(sample_tickers, data_start_date_str, BACKTEST_CONFIG.end_date, cache=True)

clean_prices, clean_fundamentals, market_cap, eligible_tickers = preprocess_data(
    price_data, fundamental_data
)

print(f"Eligible tickers: {len(eligible_tickers)}")
print(f"Price data shape: {clean_prices.shape}")


## Compute Factors


In [None]:
# Get rebalancing dates
from src.backtester import get_rebalance_dates
trading_dates = clean_prices.index.get_level_values('date').unique().sort_values()
backtest_start = pd.Timestamp(BACKTEST_CONFIG.start_date)
trading_dates = trading_dates[trading_dates >= backtest_start]
rebalance_dates = get_rebalance_dates(trading_dates, BACKTEST_CONFIG.rebalance_frequency)

print(f"Computing factors for {len(rebalance_dates)} rebalancing dates")

# Compute all factors
value_factor = compute_value_factor_panel(clean_prices, clean_fundamentals, market_cap, rebalance_dates)
momentum_factor = compute_momentum_factor_panel(clean_prices, rebalance_dates)
quality_factor = compute_quality_factor_panel(clean_prices, clean_fundamentals, market_cap, rebalance_dates)
size_factor = compute_size_factor_panel(market_cap, rebalance_dates)

print("Factors computed successfully")


## Factor Correlations


In [None]:
# Combine factors and compute correlations
factors_df = pd.DataFrame({
    'value': value_factor.iloc[:, 0],
    'momentum': momentum_factor.iloc[:, 0],
    'quality': quality_factor.iloc[:, 0],
    'size': size_factor.iloc[:, 0],
})

correlation_matrix = factors_df.corr()
print("Factor Correlation Matrix:")
print(correlation_matrix)

# Plot correlation heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(correlation_matrix, annot=True, fmt='.2f', cmap='coolwarm', center=0, square=True)
plt.title('Factor Correlation Matrix')
plt.tight_layout()
plt.show()
