# Macro Sentiment Trading Pipeline

This notebook provides an interactive interface to run the macro sentiment trading pipeline. You can:
1. Run the complete pipeline
2. Run individual components
3. Visualize results at each stage
4. Experiment with different parameters

In [2]:
# Set up paths
import sys
import os

# Add the project root directory to Python path
project_root = os.path.abspath(os.path.join(os.path.dirname('__file__'), '..'))
sys.path.append(project_root)

# Verify the path is correct
print("Project root:", project_root)
print("Python path:", sys.path)

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import logging

from src.news_collector import GDELTCollector
from src.headline_processor import HeadlineProcessor
from src.sentiment_analyzer import SentimentAnalyzer
from src.market_processor import MarketProcessor
from src.model_trainer import ModelTrainer

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Create necessary directories
os.makedirs('data/news', exist_ok=True)
os.makedirs('data/raw/gdelt', exist_ok=True)
os.makedirs('results', exist_ok=True)

Project root: c:\Users\danie\Coding Projects\Personal\macro_sentiment_trading
Python path: ['C:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.11_3.11.2544.0_x64__qbz5n2kfra8p0\\python311.zip', 'C:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.11_3.11.2544.0_x64__qbz5n2kfra8p0\\DLLs', 'C:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.11_3.11.2544.0_x64__qbz5n2kfra8p0\\Lib', 'C:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.11_3.11.2544.0_x64__qbz5n2kfra8p0', 'c:\\Users\\danie\\Coding Projects\\Personal\\macro_sentiment_trading\\.venv', '', 'c:\\Users\\danie\\Coding Projects\\Personal\\macro_sentiment_trading\\.venv\\Lib\\site-packages', 'c:\\Users\\danie\\Coding Projects\\Personal\\macro_sentiment_trading\\.venv\\Lib\\site-packages\\win32', 'c:\\Users\\danie\\Coding Projects\\Personal\\macro_sentiment_trading\\.venv\\Lib\\site-packages\\win32\\lib', 'c:\\Users\\danie\\Coding Projects\\Personal\\macro_sentiment_trading\\

  from .autonotebook import tqdm as notebook_tqdm


## 1. Data Collection and Processing

In [1]:
def collect_and_process_news(start_date: str, end_date: str, force_refresh: bool = False):
    """Collect and process news data."""
    collector = GDELTCollector()
    processor = HeadlineProcessor()
    
    # Collect news
    events_df = collector.fetch_events(start_date, end_date, force_refresh)
    
    # Process headlines
    events_df = processor.process_articles(events_df)
    
    return events_df

# Example usage
start_date = "2015-02-18"
end_date = datetime.now().strftime("%Y-%m-%d")

# Uncomment to run
events_df = collect_and_process_news(start_date, end_date)
events_df.head()

NameError: name 'datetime' is not defined

## 2. Sentiment Analysis

In [5]:
def analyze_sentiment(events_df: pd.DataFrame):
    """Analyze sentiment of headlines."""
    analyzer = SentimentAnalyzer()
    
    # Compute sentiment scores
    sentiment_df = analyzer.compute_sentiment(events_df['headline'].tolist())
    sentiment_df['date'] = events_df['date']
    
    # Compute daily features
    daily_features = analyzer.compute_daily_features(sentiment_df)
    
    return sentiment_df, daily_features

# Example usage
# sentiment_df, daily_features = analyze_sentiment(events_df)
# daily_features.head()

## 3. Market Data Processing

In [6]:
def process_market_data(start_date: str, end_date: str, daily_features: pd.DataFrame):
    """Process market data and align with sentiment features."""
    processor = MarketProcessor()
    
    # Fetch market data
    market_data = processor.fetch_market_data(start_date, end_date)
    
    # Add market features
    for asset_name in market_data:
        market_data[asset_name] = processor.compute_market_features(
            market_data[asset_name]
        )
    
    # Align features
    aligned_data = processor.align_features(market_data, daily_features)
    
    return aligned_data

# Example usage
# aligned_data = process_market_data(start_date, end_date, daily_features)
# aligned_data['EURUSD'].head()

## 4. Model Training and Backtesting

In [7]:
def train_and_backtest(aligned_data: dict):
    """Train models and run backtest."""
    trainer = ModelTrainer()
    results = {}
    metrics = {}
    
    for asset_name, data in aligned_data.items():
        # Set transaction costs
        transaction_cost = 0.0002 if asset_name in ['EURUSD', 'USDJPY'] else 0.0005
        
        # Run backtest
        asset_results = trainer.backtest(data, transaction_cost)
        results[asset_name] = asset_results
        
        # Compute metrics
        asset_metrics = {}
        for model_name, model_results in asset_results.items():
            asset_metrics[model_name] = trainer.compute_metrics(
                model_results['returns']
            )
        metrics[asset_name] = asset_metrics
        
        # Generate SHAP values
        if 'xgboost' in asset_results:
            shap_values = trainer.explain_predictions(
                trainer.models['xgboost'],
                data
            )
            
    return results, metrics

# Example usage
# results, metrics = train_and_backtest(aligned_data)
# pd.DataFrame(metrics)

## 5. Run Complete Pipeline

In [8]:
def run_complete_pipeline(start_date: str, end_date: str, force_refresh: bool = False):
    """Run the complete pipeline."""
    # Step 1: Collect and process news
    print("Step 1: Collecting and processing news...")
    events_df = collect_and_process_news(start_date, end_date, force_refresh)
    
    # Step 2: Analyze sentiment
    print("\nStep 2: Analyzing sentiment...")
    sentiment_df, daily_features = analyze_sentiment(events_df)
    
    # Step 3: Process market data
    print("\nStep 3: Processing market data...")
    aligned_data = process_market_data(start_date, end_date, daily_features)
    
    # Step 4: Train models and backtest
    print("\nStep 4: Training models and running backtest...")
    results, metrics = train_and_backtest(aligned_data)
    
    return {
        'events_df': events_df,
        'sentiment_df': sentiment_df,
        'daily_features': daily_features,
        'aligned_data': aligned_data,
        'results': results,
        'metrics': metrics
    }

# Example usage
# pipeline_results = run_complete_pipeline(start_date, end_date)
# pipeline_results['metrics']

## 6. Visualizations

In [9]:
def plot_sentiment_trends(daily_features: pd.DataFrame):
    """Plot sentiment trends over time."""
    plt.figure(figsize=(15, 5))
    plt.plot(daily_features.index, daily_features['sentiment_score'])
    plt.title('Daily Sentiment Score')
    plt.xlabel('Date')
    plt.ylabel('Sentiment Score')
    plt.grid(True)
    plt.show()

def plot_returns(results: dict, asset_name: str):
    """Plot cumulative returns for an asset."""
    plt.figure(figsize=(15, 5))
    for model_name, model_results in results[asset_name].items():
        cum_returns = (1 + model_results['returns']).cumprod()
        plt.plot(cum_returns.index, cum_returns, label=model_name)
    plt.title(f'Cumulative Returns - {asset_name}')
    plt.xlabel('Date')
    plt.ylabel('Cumulative Return')
    plt.legend()
    plt.grid(True)
    plt.show()

def plot_shap_values(shap_values: pd.DataFrame, top_n: int = 10):
    """Plot SHAP value importance."""
    plt.figure(figsize=(10, 6))
    shap_values.abs().mean().sort_values(ascending=True).tail(top_n).plot(kind='barh')
    plt.title('Feature Importance (SHAP Values)')
    plt.xlabel('Mean |SHAP value|')
    plt.tight_layout()
    plt.show()

# Example usage
# plot_sentiment_trends(daily_features)
# plot_returns(results, 'EURUSD')
# plot_shap_values(shap_values)