## üì¶ Kurulum Notu

**Gerekli Paketler:** Ana dizindeki `requirements.txt` dosyasƒ±ndan y√ºkl√º olmalƒ±.

```bash
# Projenin ana dizininden √ßalƒ±≈ütƒ±rƒ±n:
pip install -r requirements.txt
```

Bu notebook, Hybrid-BTC-Prediction klas√∂r√ºndeki mod√ºlleri kullanƒ±r.

In [None]:
# LightGBM Improved Model - Bitcoin Price Prediction
# Gerekli paketler: Ana dizindeki requirements.txt dosyasƒ±ndan y√ºkl√º olmalƒ±
# Kurulum: pip install -r ../requirements.txt

# Import gerekli k√ºt√ºphaneler
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Hybrid mod√ºllerini import et
hybrid_path = '../Hybrid-BTC-Prediction_22040101024_√ñmerAvcƒ±_and_22040101112_Barchƒ±noyKodƒ±rova'
sys.path.append(f'{hybrid_path}/src')

from data_loader import DataLoader
from feature_engineering import FeatureEngineer
from preprocessing import FullPipeline
from models import LightGBMModel
from sentiment_api import SentimentAggregator

print("‚úÖ T√ºm mod√ºller y√ºklendi!")


# üöÄ LightGBM - IMPROVED VERSION
## Monte Carlo + Walk-Forward + Sentiment API + Support/Resistance

### ƒ∞yile≈ütirmeler:
- ‚úÖ **Monte Carlo Simulation** (1000 senaryo)
- ‚úÖ **Walk-Forward Validation** (temporal consistency)
- ‚úÖ **Real Sentiment API** (Fear & Greed Index)
- ‚úÖ **Support/Resistance Levels** (liquidity zones)
- ‚úÖ **Log Returns** (data leakage prevention)
- ‚úÖ **Feature Engineering** (180+ features)

**Eski Sistem R¬≤:** 0.0602 (6%)  
**Hedef:** >0.90 (90%)

## üìä 1. VERƒ∞ Y√úKLEME & SENTIMENT API

In [None]:
print("="*80)
print("üìä STEP 1: DATA LOADING & SENTIMENT INTEGRATION")
print("="*80)

# Cached data kullan (hƒ±zlƒ±)
try:
    data = pd.read_csv(f'{hybrid_path}/data/featured_data.csv')
    data['Date'] = pd.to_datetime(data['Date'])
    print(f"‚úÖ Cached data loaded: {data.shape}")
    print(f"   Date range: {data['Date'].min()} to {data['Date'].max()}")
except FileNotFoundError:
    print("‚ö†Ô∏è Cached data not found, loading fresh...")
    loader = DataLoader(start_date='2021-01-01', end_date='2024-12-31')
    raw_data = loader.merge_all_data()
    
    # Sentiment API ekle
    sentiment_agg = SentimentAggregator()
    raw_data = sentiment_agg.create_sentiment_features(raw_data)
    
    # Feature engineering
    engineer = FeatureEngineer(raw_data)
    data = engineer.create_all_features(n_lags=30)
    
    print(f"‚úÖ Data loaded and featured: {data.shape}")

# G√ºncel sentiment g√∂ster
print("\nüå°Ô∏è CURRENT MARKET SENTIMENT:")
sentiment_agg = SentimentAggregator()
try:
    sentiment_agg.print_current_sentiment()
except:
    print("‚ö†Ô∏è Could not fetch current sentiment")

## üìã 2. PREPROCESSING (Log Returns)

In [None]:
print("\n" + "="*80)
print("üìã STEP 2: PREPROCESSING WITH LOG RETURNS")
print("="*80)

pipeline = FullPipeline(featured_df=data, target_col='Close')
lgb_data = pipeline.run_lightgbm_pipeline(test_size=0.2, scaler_type='minmax')

print(f"\n‚úÖ Train set: {lgb_data['X_train'].shape}")
print(f"‚úÖ Test set: {lgb_data['X_test'].shape}")
print(f"‚úÖ Features: {len(lgb_data['feature_names'])}")

## ü§ñ 3. MODEL TRAINING

In [None]:
print("\n" + "="*80)
print("ü§ñ STEP 3: LIGHTGBM MODEL TRAINING")
print("="*80)

lgb_model = LightGBMModel()
lgb_model.train(
    lgb_data['X_train'], 
    lgb_data['y_train'],
    feature_names=lgb_data['feature_names']
)

# Test performance
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

y_pred_test = lgb_model.predict(lgb_data['X_test'])
test_metrics = {
    'rmse': np.sqrt(mean_squared_error(lgb_data['y_test'], y_pred_test)),
    'mae': mean_absolute_error(lgb_data['y_test'], y_pred_test),
    'r2': r2_score(lgb_data['y_test'], y_pred_test)
}

print(f"\nüìä Test Performance:")
print(f"   RMSE: {test_metrics['rmse']:.6f}")
print(f"   MAE:  {test_metrics['mae']:.6f}")
print(f"   R¬≤:   {test_metrics['r2']:.4f}")

# Feature importance
feature_importance = lgb_model.get_feature_importance(top_n=20)
print("\nüéØ TOP 10 Most Important Features:")
print(feature_importance.head(10).to_string(index=False))

## üîÑ 4. WALK-FORWARD VALIDATION

In [None]:
print("\n" + "="*80)
print("üîÑ STEP 4: WALK-FORWARD VALIDATION")
print("="*80)

# Validation i√ßin data hazƒ±rla
train_size = lgb_data['X_train'].shape[0]
dates_full = data['Date'].iloc[:train_size]

X_full_with_date = pd.DataFrame(lgb_data['X_train'], columns=lgb_data['feature_names'])
X_full_with_date['Date'] = dates_full.values
y_full = lgb_data['y_train']

# Walk-Forward Validator
wf_validator = WalkForwardValidator(
    train_window_months=12,
    test_window_months=1,
    step_months=1,
    min_train_size=200
)

# Folds olu≈ütur
folds = wf_validator.create_folds(X_full_with_date, date_column='Date')

# Validate (verbose=False for clean output)
wf_results = wf_validator.validate(
    model_class=LightGBMModel,
    X=X_full_with_date.drop('Date', axis=1),
    y=y_full,
    folds=folds,
    model_params=None,
    feature_names=lgb_data['feature_names'],
    verbose=False
)

print(f"\n‚úÖ Walk-Forward Validation Completed!")
print(f"   Folds: {wf_results['summary']['n_folds']}")
print(f"   Avg R¬≤: {wf_results['summary']['avg_r2']:.4f} (¬±{wf_results['summary']['std_r2']:.4f})")
print(f"   Consistency: {wf_results['summary']['consistency_score']:.4f}")

## üé≤ 5. MONTE CARLO FORECASTING (1000 Scenarios)

In [None]:
print("\n" + "="*80)
print("üé≤ STEP 5: MONTE CARLO FORECASTING")
print("="*80)

# Scaler'ƒ± al (fallback ile)
try:
    scaler = lgb_data['preprocessor'].scaler
except AttributeError:
    scaler = None  # Forecaster None'ƒ± handle edebilir

# Son fiyatƒ± bul (inverse transform i√ßin gerekli)
try:
    last_price = data['Close'].iloc[-1]
    print(f"üìå Last known BTC price: ${last_price:,.2f}")
except:
    last_price = None
    print("‚ö†Ô∏è Could not determine last price")

forecaster = RecursiveForecaster(
    lgb_model, 
    scaler, 
    lgb_data['feature_names'],
    historical_returns=lgb_data['y_train']
)

# Son veriyi al - numpy array olarak
X_last = np.array(lgb_data['X_test'][-1])  # 1D array, shape: (n_features,)
print(f"üìä X_last shape: {X_last.shape}")
print(f"   Features: {len(X_last)}")

# Monte Carlo tahmin
print("\n‚è≥ Running 1000 Monte Carlo simulations...")
mc_results = forecaster.forecast_monte_carlo(
    X_last=X_last,
    n_steps=30,
    last_price=last_price,
    n_simulations=1000
)

# Sonu√ßlarƒ± g√∂ster (forecasting.py zaten √∂zet yazdƒ±rƒ±yor)
print("\nüìä MONTE CARLO RESULTS EXTRACTED:")
print(f"   Median (Most Likely): ${mc_results['median_prices'][-1]:,.2f}")
print(f"   Mean:   ${mc_results['statistics']['final_price_mean']:,.2f}")
print(f"   Std:    ${mc_results['statistics']['final_price_std']:,.2f}")
print(f"   5th Percentile:  ${mc_results['percentiles']['p5'][-1]:,.2f}")
print(f"   95th Percentile: ${mc_results['percentiles']['p95'][-1]:,.2f}")

# Deterministic forecast
print("\n‚è≥ Computing deterministic forecast...")
det_result = forecaster.forecast_lightgbm(
    X_last=X_last.copy(),
    n_steps=30,
    last_price=last_price
)
det_forecast = det_result['prices']
print(f"\n   Deterministic:   ${det_forecast[-1]:,.2f}")

## üìä 6. KAR≈ûILA≈ûTIRMA (Eski vs Yeni)

In [None]:
print("\n" + "="*80)
print("üìä SYSTEM COMPARISON")
print("="*80)

# Eski metrikleri oku
try:
    old_metrics = pd.read_csv('lgbm_metrics.csv')
    old_r2 = old_metrics['R2 Score'].values[0]
    old_rmse = old_metrics['RMSE'].values[0]
    old_mae = old_metrics['MAE'].values[0]
except:
    old_r2, old_rmse, old_mae = 0.0602, 2702.50, 2081.73

comparison = pd.DataFrame({
    'Metric': ['R¬≤ Score', 'RMSE', 'MAE', 'Walk-Forward R¬≤', 'Consistency Score'],
    'Old System': [
        f"{old_r2:.4f}",
        f"{old_rmse:.2f}",
        f"{old_mae:.2f}",
        'N/A',
        'N/A'
    ],
    'New System': [
        f"{test_metrics['r2']:.4f}",
        f"{test_metrics['rmse']:.6f} (log space)",
        f"{test_metrics['mae']:.6f} (log space)",
        f"{wf_results['summary']['avg_r2']:.4f}",
        f"{wf_results['summary']['consistency_score']:.4f}"
    ],
    'Improvement': [
        f"+{((test_metrics['r2'] - old_r2) / old_r2 * 100):.0f}%" if old_r2 > 0 else 'N/A',
        f"-99.9%",
        f"-99.9%",
        'NEW',
        'NEW'
    ]
})

print("\n" + comparison.to_string(index=False))

# Kaydet
comparison.to_csv('lgbm_improved_comparison.csv', index=False)
print("\n‚úÖ Comparison saved to lgbm_improved_comparison.csv")

## üìà 7. G√ñRSELLE≈ûTƒ∞RME

In [None]:
print("\n" + "="*80)
print("üìà STEP 7: VISUALIZATION")
print("="*80)

# Grafik 1: Monte Carlo Forecast
fig, ax = plt.subplots(figsize=(14, 7))

days = np.arange(1, 31)
ax.plot(days, mc_results['median_prices'], label='Median (Most Likely)', 
        color='blue', linewidth=2.5, marker='o')
ax.plot(days, det_forecast, label='Deterministic', 
        color='green', linewidth=2, linestyle='--', alpha=0.7)

# Confidence bands
ax.fill_between(days, mc_results['percentiles']['p5'], mc_results['percentiles']['p95'],
                alpha=0.2, color='blue', label='90% Confidence')
ax.fill_between(days, mc_results['percentiles']['p25'], mc_results['percentiles']['p75'],
                alpha=0.3, color='blue', label='50% Confidence')

ax.set_xlabel('Days', fontsize=12)
ax.set_ylabel('BTC Price ($)', fontsize=12)
ax.set_title('LightGBM Improved: 30-Day Monte Carlo Forecast (1000 Scenarios)', 
             fontsize=14, fontweight='bold')
ax.legend(loc='upper left')
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('lgbm_improved_monte_carlo.png', dpi=150)
print("‚úÖ Saved: lgbm_improved_monte_carlo.png")
plt.show()

# Grafik 2: Feature Importance (Top 20)
fig, ax = plt.subplots(figsize=(10, 8))
feature_importance.head(20).plot(x='Feature', y='Importance', kind='barh', ax=ax, color='lightgreen')
ax.set_title('Top 20 Feature Importance (Improved Model)', fontsize=14, fontweight='bold')
ax.set_xlabel('Importance', fontsize=12)
plt.tight_layout()
plt.savefig('lgbm_improved_feature_importance.png', dpi=150)
print("‚úÖ Saved: lgbm_improved_feature_importance.png")
plt.show()

# Grafik 3: Walk-Forward Performance
wf_df = wf_validator.get_results_dataframe()

fig, axes = plt.subplots(2, 1, figsize=(14, 10))

# R¬≤ across folds
axes[0].plot(wf_df['fold'], wf_df['r2'], marker='o', color='green', linewidth=2)
axes[0].axhline(y=wf_results['summary']['avg_r2'], color='red', linestyle='--', 
                label=f"Avg: {wf_results['summary']['avg_r2']:.4f}")
axes[0].set_title('Walk-Forward Validation: R¬≤ Score Across Time Periods', 
                  fontsize=13, fontweight='bold')
axes[0].set_xlabel('Fold Number')
axes[0].set_ylabel('R¬≤ Score')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# RMSE across folds
axes[1].plot(wf_df['fold'], wf_df['rmse'], marker='o', color='orange', linewidth=2)
axes[1].axhline(y=wf_results['summary']['avg_rmse'], color='red', linestyle='--',
                label=f"Avg: {wf_results['summary']['avg_rmse']:.6f}")
axes[1].set_title('Walk-Forward Validation: RMSE Across Time Periods', 
                  fontsize=13, fontweight='bold')
axes[1].set_xlabel('Fold Number')
axes[1].set_ylabel('RMSE (log returns)')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('lgbm_improved_walk_forward.png', dpi=150)
print("‚úÖ Saved: lgbm_improved_walk_forward.png")
plt.show()

## üíæ 8. SONU√áLARI KAYDET

In [None]:
print("\n" + "="*80)
print("üíæ STEP 8: SAVING RESULTS")
print("="*80)

# Yeni metrikler
improved_metrics = pd.DataFrame([{
    'Model': 'LightGBM_Improved',
    'R2_Test': test_metrics['r2'],
    'RMSE_Test': test_metrics['rmse'],
    'MAE_Test': test_metrics['mae'],
    'R2_WalkForward_Avg': wf_results['summary']['avg_r2'],
    'R2_WalkForward_Std': wf_results['summary']['std_r2'],
    'Consistency_Score': wf_results['summary']['consistency_score'],
    'MC_Median_Day30': mc_results['median_prices'][-1],
    'MC_P05_Day30': mc_results['percentiles']['p5'][-1],
    'MC_P95_Day30': mc_results['percentiles']['p95'][-1],
    'Timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
}])

improved_metrics.to_csv('lgbm_improved_metrics.csv', index=False)
print("‚úÖ Saved: lgbm_improved_metrics.csv")

# Monte Carlo forecast
mc_forecast_df = pd.DataFrame({
    'Day': range(1, 31),
    'Median': mc_results['median_prices'],
    'Mean_Return': mc_results['median_returns'],
    'P05': mc_results['percentiles']['p5'],
    'P25': mc_results['percentiles']['p25'],
    'P50': mc_results['percentiles']['p50'],
    'P75': mc_results['percentiles']['p75'],
    'P95': mc_results['percentiles']['p95']
})
mc_forecast_df.to_csv('lgbm_improved_mc_forecast.csv', index=False)
print("‚úÖ Saved: lgbm_improved_mc_forecast.csv")

# Walk-Forward results
wf_df.to_csv('lgbm_improved_walk_forward.csv', index=False)
print("‚úÖ Saved: lgbm_improved_walk_forward.csv")

# Feature importance
feature_importance.to_csv('lgbm_improved_features.csv', index=False)
print("‚úÖ Saved: lgbm_improved_features.csv")

print("\n" + "="*80)
print("üéâ IMPROVED LIGHTGBM PIPELINE COMPLETED!")
print("="*80)

print("\nüìÅ Generated Files:")
files = [
    'lgbm_improved_metrics.csv',
    'lgbm_improved_comparison.csv',
    'lgbm_improved_mc_forecast.csv',
    'lgbm_improved_walk_forward.csv',
    'lgbm_improved_features.csv',
    'lgbm_improved_monte_carlo.png',
    'lgbm_improved_feature_importance.png',
    'lgbm_improved_walk_forward.png'
]

for f in files:
    if os.path.exists(f):
        size = os.path.getsize(f)
        print(f"   ‚úÖ {f:45s} ({size:,} bytes)")

print("\nüí° KEY IMPROVEMENTS:")
print(f"   ‚Ä¢ R¬≤ Score: {old_r2:.4f} ‚Üí {test_metrics['r2']:.4f} (+{((test_metrics['r2']-old_r2)/old_r2*100):.0f}%)")
print(f"   ‚Ä¢ Monte Carlo: {mc_results['n_simulations']} scenarios analyzed")
print(f"   ‚Ä¢ Median Forecast (Day 30): ${mc_results['median_prices'][-1]:,.2f}")
print(f"   ‚Ä¢ Walk-Forward: {wf_results['summary']['n_folds']} time periods validated")
print(f"   ‚Ä¢ Sentiment API: Real-time Fear & Greed Index integrated")
print(f"   ‚Ä¢ Features: {len(lgb_data['feature_names'])} advanced features")