# DE40 Trend and Range Analysis

## Objective
Analyze the trend characteristics and range behavior of the DAX Index (DE40) to understand market structure, volatility patterns, and mean-reversion vs. momentum tendencies.

## Key Metrics
- **Hurst Exponent**: Measure of market efficiency (trending vs. mean-reverting)
- **Autocorrelation**: Price momentum persistence
- **Range Metrics**: Intraday range, volatility clustering
- **Gap Analysis**: Data quality and market anomalies

## 1. Environment Setup

In [None]:
import sys
sys.path.insert(0, '../../')

from shared.database_connector import fetch_ohlcv, get_date_range
from shared.data_module import process_data
from shared.config import SYMBOLS
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 6)

print('[OK] Environment setup complete')

## 2. Data Fetching

In [None]:
date_range = get_date_range('deuidxeur', 'h1')
end_date = date_range['end']
start_date = end_date - timedelta(days=365)
print(f'Analysis period: {start_date.date()} to {end_date.date()}')

In [None]:
df_raw = fetch_ohlcv(
    symbol='deuidxeur',
    timeframe='h1',
    start_date=start_date,
    end_date=end_date
)
print(f'Raw data shape: {df_raw.shape}')
print(f'First 5 rows (UTC):')
print(df_raw.head())

In [None]:
df_clean = process_data(
    df=df_raw,
    symbol='deuidxeur',
    timeframe='h1',
    local_time=True,
    exclude_news=False
)
print(f'Cleaned data shape: {df_clean.shape}')
print(f'Timezone: {df_clean.index.tz}')
print(f'First 5 rows (local time, market hours):')
print(df_clean.head())

## 3. Gap Analysis

In [None]:
expected_raw = (df_raw.index[-1] - df_raw.index[0]).total_seconds() / 3600
actual_raw = len(df_raw)
gap_raw = ((expected_raw - actual_raw) / expected_raw * 100)

expected_clean = (df_clean.index[-1] - df_clean.index[0]).total_seconds() / 3600
actual_clean = len(df_clean)
gap_clean = ((expected_clean - actual_clean) / expected_clean * 100) if expected_clean > 0 else 0

print('='*80)
print('Gap Analysis - RAW DATA (all hours, nights, weekends)')
print('='*80)
print(f'Expected candles: {expected_raw:.0f}')
print(f'Actual candles: {actual_raw}')
print(f'Gap: {gap_raw:.2f}%')

print(f'\n' + '='*80)
print('Gap Analysis - CLEAN DATA (market hours: 09:00-17:30)')
print('='*80)
print(f'Expected candles: {expected_clean:.0f}')
print(f'Actual candles: {actual_clean}')
print(f'Gap: {gap_clean:.2f}%')
print(f'Data quality: {100 - gap_clean:.1f}%')
print('='*80)

## 4. Hurst Exponent

In [None]:
def calculate_hurst_exponent(price_series, max_lag=1000):
    lags = range(10, max_lag, 10)
    tau = []
    for lag in lags:
        returns = np.log(price_series / price_series.shift(1)).dropna()
        mean_adjusted = returns - returns.mean()
        cumsum = np.cumsum(mean_adjusted[:lag])
        range_val = np.max(cumsum) - np.min(cumsum)
        std = np.std(returns[:lag], ddof=1)
        if std > 0:
            tau.append(range_val / std)
    lags = np.array(list(lags))[:len(tau)]
    poly = np.polyfit(np.log(lags), np.log(tau), 1)
    return poly[0], lags, np.array(tau)

hurst, lags, tau = calculate_hurst_exponent(df_clean['close'], max_lag=500)
print(f'Hurst Exponent: {hurst:.4f}')
if hurst < 0.5:
    print('Interpretation: Mean-reverting market')
else:
    print('Interpretation: Trending market')

## 5. Autocorrelation

In [None]:
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

returns = np.log(df_clean['close'] / df_clean['close'].shift(1)).dropna()

print(f'Return Statistics:')
print(f'Mean: {returns.mean() * 100:.4f}%')
print(f'Std: {returns.std() * 100:.4f}%')

lags_to_check = [1, 2, 4, 8, 24, 48]
autocorr_values = [returns.autocorr(lag=lag) for lag in lags_to_check]

print(f'\nAutocorrelation at Specific Lags:')
for lag, ac in zip(lags_to_check, autocorr_values):
    print(f'  Lag {lag:2d}: {ac:7.4f}')

## 6. Range Analysis

In [None]:
df_clean['range'] = df_clean['high'] - df_clean['low']
df_clean['range_pct'] = (df_clean['range'] / df_clean['open'] * 100)

print(f'Range Statistics:')
print(f'Mean: {df_clean["range"].mean():.2f} points')
print(f'Median: {df_clean["range"].median():.2f} points')
print(f'Std: {df_clean["range"].std():.2f} points')
print(f'Mean %: {df_clean["range_pct"].mean():.4f}%')

## 7. Summary

In [None]:
print('='*80)
print('DE40 ANALYSIS SUMMARY')
print('='*80)
print(f'Period: {df_clean.index[0].date()} to {df_clean.index[-1].date()}')
print(f'Candles: {len(df_clean)} (market hours only)')
print(f'Data Quality: {100 - gap_clean:.1f}%')
print(f'Hurst Exponent: {hurst:.4f}')
print(f'Mean Range: {df_clean["range"].mean():.2f} points')
print('='*80)