In [1]:
# Web3 Trading Analysis - Feature Engineering
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Import custom modules
import sys
from pathlib import Path
sys.path.append(str(Path('../src').resolve()))

from feature_engineer import FeatureEngineering, TradingPerformanceFeatures, SentimentFeatures
from utils import DataProfiler

print("⚙️ **FEATURE ENGINEERING & PERFORMANCE METRICS**")
print("=" * 60)
print(f"📅 Processing Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")


⚙️ **FEATURE ENGINEERING & PERFORMANCE METRICS**
📅 Processing Date: 2025-08-08 18:37:58


In [2]:
# Load processed datasets
print("📥 Loading processed datasets for feature engineering...\n")

sentiment_processed = pd.read_csv("../data/processed/sentiment_processed.csv")
trading_processed = pd.read_csv("../data/processed/trading_processed.csv")

print(f"✅ Processed sentiment data: {sentiment_processed.shape}")
print(f"✅ Processed trading data: {trading_processed.shape}")

# Quick data validation
print(f"\n🔍 **Data Quality Check:**")
print(f"   • Sentiment date range: {sentiment_processed['date_standardized'].min()} to {sentiment_processed['date_standardized'].max()}")
print(f"   • Trading date range: {trading_processed['trading_date'].min()} to {trading_processed['trading_date'].max()}")
print(f"   • Unique traders: {trading_processed['Account'].nunique():,}")
print(f"   • Unique trading days: {trading_processed['trading_date'].nunique():,}")


📥 Loading processed datasets for feature engineering...

✅ Processed sentiment data: (2644, 7)
✅ Processed trading data: (35558, 21)

🔍 **Data Quality Check:**
   • Sentiment date range: 2018-02-01 to 2025-05-02
   • Trading date range: 2023-04-30 to 2025-05-01
   • Unique traders: 32
   • Unique trading days: 451


In [3]:
# Initialize feature engineering
feature_engineer = FeatureEngineering()

print("🚀 **Feature Engineering System Initialized!**")
print("\nAvailable feature creation methods:")
print("   • Daily trader performance metrics")
print("   • Trader behavioral characteristics") 
print("   • Enhanced sentiment indicators")
print("   • Market timing features")
print("   • Master dataset integration")


🚀 **Feature Engineering System Initialized!**

Available feature creation methods:
   • Daily trader performance metrics
   • Trader behavioral characteristics
   • Enhanced sentiment indicators
   • Market timing features
   • Master dataset integration


In [5]:
# Quick fix for the GroupBy error
def fixed_calculate_trader_behavior_features(trading_df):
    """Fixed version of trader behavior features calculation"""
    print("👤 **Analyzing Trader Behavioral Features**")
    
    trader_behavior = trading_df.groupby('Account').agg({
        'Closed PnL': ['sum', 'mean', 'std', 'count'],
        'Size USD': ['sum', 'mean', 'std', 'max'],
        'trading_date': ['nunique', 'min', 'max'],
        'Coin': 'nunique',
        'Side': lambda x: (x == 'BUY').mean(),
        'Fee': 'sum'
    })
    
    # Flatten columns
    trader_behavior.columns = [
        'total_career_pnl', 'avg_pnl_per_trade', 'pnl_volatility', 'total_trades',
        'total_career_volume', 'avg_trade_size', 'trade_size_volatility', 'max_trade_size',
        'active_days', 'first_trade_date', 'last_trade_date',
        'unique_coins_traded', 'buy_preference_ratio', 'total_career_fees'
    ]
    
    trader_behavior['trading_frequency'] = (trader_behavior['total_trades'] / 
                                          trader_behavior['active_days']).round(2)
    
    # Fixed win rate calculation
    win_rates = []
    profit_factors = []
    
    for account in trader_behavior.index:
        account_trades = trading_df[trading_df['Account'] == account]
        
        # Win rate
        profitable_trades = (account_trades['Closed PnL'] > 0).sum()
        total_trades = len(account_trades)
        win_rate = profitable_trades / total_trades if total_trades > 0 else 0
        win_rates.append(round(win_rate, 4))
        
        # Profit factor
        profits = account_trades[account_trades['Closed PnL'] > 0]['Closed PnL'].sum()
        losses = abs(account_trades[account_trades['Closed PnL'] < 0]['Closed PnL'].sum())
        profit_factor = profits / losses if losses > 0 else (profits if profits > 0 else 0)
        profit_factors.append(round(profit_factor, 4))
    
    trader_behavior['win_rate'] = win_rates
    trader_behavior['profit_factor'] = profit_factors
    
    trader_behavior['risk_adjusted_return'] = (trader_behavior['total_career_pnl'] / 
                                             trader_behavior['trade_size_volatility']).fillna(0).round(4)
    
    trader_behavior['diversification_score'] = (trader_behavior['unique_coins_traded'] / 
                                               trader_behavior['total_trades']).round(4)
    
    # Trading style classification
    trader_behavior['trader_type'] = 'Unknown'
    trader_behavior.loc[trader_behavior['trading_frequency'] > 10, 'trader_type'] = 'High_Frequency'
    trader_behavior.loc[(trader_behavior['trading_frequency'] <= 10) & 
                      (trader_behavior['trading_frequency'] > 2), 'trader_type'] = 'Active'
    trader_behavior.loc[trader_behavior['trading_frequency'] <= 2, 'trader_type'] = 'Casual'
    
    print(f"✅ Analyzed behavioral features for {len(trader_behavior)} unique traders")
    return trader_behavior.reset_index()

# Patch the feature engineer
feature_engineer.trading_features.calculate_trader_behavior_features = fixed_calculate_trader_behavior_features

print("🔧 **Feature engineer patched successfully!**")


🔧 **Feature engineer patched successfully!**


In [6]:
# Create comprehensive feature sets
feature_sets = feature_engineer.create_all_features(trading_processed, sentiment_processed)

print("\n📋 **Feature Sets Created:**")
for name, df in feature_sets.items():
    print(f"\n🗂️ **{name.replace('_', ' ').title()}:**")
    print(f"   • Records: {len(df):,}")
    print(f"   • Features: {len(df.columns)}")
    print(f"   • Key columns: {list(df.columns[:5])}")


🎯 **COMPREHENSIVE FEATURE ENGINEERING**
📈 **Calculating Daily Trader Performance Metrics**
✅ Generated 1953 trader-day performance records
👤 **Analyzing Trader Behavioral Features**
✅ Analyzed behavioral features for 32 unique traders
😰😤 **Creating Enhanced Sentiment Features**
✅ Enhanced 2644 sentiment records with advanced features
🔄 **Identifying Sentiment Cycles**
✅ Identified sentiment cycles and extreme event distances
⏰ **Creating Market Timing Features**
✅ Created timing features for 1953 trader-day records

📊 **FEATURE ENGINEERING SUMMARY:**
   • daily_performance: 1,953 records, 19 features
   • trader_behavior: 32 records, 21 features
   • sentiment_enhanced: 2,644 records, 16 features
   • sentiment_cycles: 2,644 records, 19 features
   • timing_features: 1,953 records, 28 features

📋 **Feature Sets Created:**

🗂️ **Daily Performance:**
   • Records: 1,953
   • Features: 19
   • Key columns: ['Account', 'trading_date', 'total_pnl', 'avg_pnl_per_trade', 'total_trades']

🗂️ *

In [7]:
# Analyze daily performance features
daily_perf = feature_sets['daily_performance']

print("📈 **DAILY PERFORMANCE FEATURES ANALYSIS:**")
print("=" * 50)

# Performance statistics
print(f"\n💰 **PnL Distribution:**")
print(f"   • Total profitable days: {(daily_perf['profitable_day'] == 1).sum():,}")
print(f"   • Total loss days: {(daily_perf['profitable_day'] == 0).sum():,}")
print(f"   • Overall win rate: {(daily_perf['profitable_day'] == 1).mean()*100:.1f}%")
print(f"   • Average daily PnL: ${daily_perf['total_pnl'].mean():.2f}")
print(f"   • Best single day: ${daily_perf['total_pnl'].max():.2f}")
print(f"   • Worst single day: ${daily_perf['total_pnl'].min():.2f}")

# Trading activity analysis
print(f"\n📊 **Trading Activity:**")
print(f"   • Average trades per day: {daily_perf['total_trades'].mean():.1f}")
print(f"   • Average volume per day: ${daily_perf['total_volume_usd'].mean():,.2f}")
print(f"   • Most active day: {daily_perf['total_trades'].max()} trades")

# Display sample of features
print(f"\n📋 **Sample Daily Performance Features:**")
display(daily_perf[['Account', 'trading_date', 'total_pnl', 'total_trades', 
                   'profitable_day', 'roi_percentage', 'buy_sell_ratio']].head(10))


📈 **DAILY PERFORMANCE FEATURES ANALYSIS:**

💰 **PnL Distribution:**
   • Total profitable days: 1,112
   • Total loss days: 841
   • Overall win rate: 56.9%
   • Average daily PnL: $1241.86
   • Best single day: $252021.68
   • Worst single day: $-141839.35

📊 **Trading Activity:**
   • Average trades per day: 18.2
   • Average volume per day: $96,750.62
   • Most active day: 1103 trades

📋 **Sample Daily Performance Features:**


Unnamed: 0,Account,trading_date,total_pnl,total_trades,profitable_day,roi_percentage,buy_sell_ratio
0,0x083384f897ee0f19899168e3b1bec365f52a9012,2024-11-11,0.0,155,0,0.0,0.0
1,0x083384f897ee0f19899168e3b1bec365f52a9012,2024-11-17,0.0,53,0,0.0,0.0
2,0x083384f897ee0f19899168e3b1bec365f52a9012,2024-11-18,0.0,30,0,0.0,0.0
3,0x083384f897ee0f19899168e3b1bec365f52a9012,2024-11-22,-19086.2783,11,0,-6.2757,11000.0
4,0x083384f897ee0f19899168e3b1bec365f52a9012,2024-11-26,1440.0968,18,1,0.3588,0.79992
5,0x083384f897ee0f19899168e3b1bec365f52a9012,2024-11-27,-88548.2632,57,0,-9.7544,57000.0
6,0x083384f897ee0f19899168e3b1bec365f52a9012,2024-11-30,0.0,11,0,0.0,0.0
7,0x083384f897ee0f19899168e3b1bec365f52a9012,2024-12-06,-33775.849,18,0,-14.4858,18000.0
8,0x083384f897ee0f19899168e3b1bec365f52a9012,2025-01-19,23834.6777,247,1,0.9719,0.286457
9,0x083384f897ee0f19899168e3b1bec365f52a9012,2025-01-20,31542.2414,16,1,16.4057,2.19956


In [9]:
# Fix: Ensure sentiment cycles are properly created
print("🔄 **Creating Missing Sentiment Cycle Features**")

# Get the sentiment_enhanced data
sentiment_enhanced = feature_sets['sentiment_enhanced'].copy()

# Manually add cycle_phase column if missing
if 'cycle_phase' not in sentiment_enhanced.columns:
    print("   • Adding missing cycle_phase column...")
    sentiment_enhanced['cycle_phase'] = 'Unknown'
    
    # Simple cycle detection based on sentiment momentum
    for i in range(1, len(sentiment_enhanced)):
        current_score = sentiment_enhanced.iloc[i]['sentiment_score']
        prev_score = sentiment_enhanced.iloc[i-1]['sentiment_score']
        
        if current_score > prev_score and current_score >= 3:
            sentiment_enhanced.iloc[i, sentiment_enhanced.columns.get_loc('cycle_phase')] = 'Recovery'
        elif current_score > prev_score and current_score < 3:
            sentiment_enhanced.iloc[i, sentiment_enhanced.columns.get_loc('cycle_phase')] = 'Bottoming'
        elif current_score < prev_score and current_score <= 3:
            sentiment_enhanced.iloc[i, sentiment_enhanced.columns.get_loc('cycle_phase')] = 'Declining'
        elif current_score < prev_score and current_score > 3:
            sentiment_enhanced.iloc[i, sentiment_enhanced.columns.get_loc('cycle_phase')] = 'Topping'
        else:
            sentiment_enhanced.iloc[i, sentiment_enhanced.columns.get_loc('cycle_phase')] = 'Stable'

# Add days since extreme events if missing
if 'days_since_extreme_fear' not in sentiment_enhanced.columns:
    print("   • Adding days since extreme events...")
    extreme_fear_indices = sentiment_enhanced[sentiment_enhanced['extreme_fear_event'] == 1].index
    extreme_greed_indices = sentiment_enhanced[sentiment_enhanced['extreme_greed_event'] == 1].index
    
    sentiment_enhanced['days_since_extreme_fear'] = 999
    sentiment_enhanced['days_since_extreme_greed'] = 999
    
    for i, row in sentiment_enhanced.iterrows():
        # Days since extreme fear
        if len(extreme_fear_indices) > 0:
            fear_distances = [abs(i - idx) for idx in extreme_fear_indices if idx <= i]
            if fear_distances:
                sentiment_enhanced.at[i, 'days_since_extreme_fear'] = min(fear_distances)
        
        # Days since extreme greed
        if len(extreme_greed_indices) > 0:
            greed_distances = [abs(i - idx) for idx in extreme_greed_indices if idx <= i]
            if greed_distances:
                sentiment_enhanced.at[i, 'days_since_extreme_greed'] = min(greed_distances)

# Update the feature set
feature_sets['sentiment_enhanced'] = sentiment_enhanced
print("✅ Sentiment cycle features added successfully!")


🔄 **Creating Missing Sentiment Cycle Features**
   • Adding missing cycle_phase column...
   • Adding days since extreme events...
✅ Sentiment cycle features added successfully!


In [11]:
# Analyze enhanced sentiment features - FULLY FIXED VERSION
sentiment_enhanced = feature_sets['sentiment_enhanced']

print("😰😤 **ENHANCED SENTIMENT FEATURES ANALYSIS:**")
print("=" * 50)

# Check what columns we actually have
print(f"\n🔍 **Available Sentiment Columns:**")
sentiment_columns = list(sentiment_enhanced.columns)
print(f"   Available: {sentiment_columns}")

# Sentiment distribution
print(f"\n📊 **Sentiment Score Distribution:**")
sentiment_dist = sentiment_enhanced['sentiment_score'].value_counts().sort_index()
for score, count in sentiment_dist.items():
    pct = (count / len(sentiment_enhanced)) * 100
    print(f"   • Score {score}: {count:,} days ({pct:.1f}%)")

# Market regime analysis
print(f"\n🎯 **Market Regime Distribution:**")
regime_dist = sentiment_enhanced['market_regime'].value_counts()
for regime, count in regime_dist.items():
    pct = (count / len(sentiment_enhanced)) * 100
    print(f"   • {regime}: {count:,} days ({pct:.1f}%)")

# Extreme events
extreme_fear_count = sentiment_enhanced['extreme_fear_event'].sum()
extreme_greed_count = sentiment_enhanced['extreme_greed_event'].sum()
print(f"\n⚡ **Extreme Events:**")
print(f"   • Extreme fear events: {extreme_fear_count}")
print(f"   • Extreme greed events: {extreme_greed_count}")

# Sentiment momentum analysis (if available)
if 'sentiment_momentum' in sentiment_enhanced.columns:
    print(f"\n📈 **Sentiment Momentum Distribution:**")
    momentum_dist = sentiment_enhanced['sentiment_momentum'].value_counts()
    for momentum, count in momentum_dist.items():
        pct = (count / len(sentiment_enhanced)) * 100
        print(f"   • {momentum}: {count:,} days ({pct:.1f}%)")

# Display sample with only available columns
print(f"\n📋 **Sample Enhanced Sentiment Features:**")
# Use only columns that actually exist
available_sample_columns = []
desired_columns = ['date_standardized', 'classification', 'sentiment_score', 
                  'market_regime', 'sentiment_momentum', 'cycle_phase']

for col in desired_columns:
    if col in sentiment_enhanced.columns:
        available_sample_columns.append(col)

print(f"   Displaying columns: {available_sample_columns}")
display(sentiment_enhanced[available_sample_columns].head(10))

# Additional analysis with available features
if 'sentiment_7d_avg' in sentiment_enhanced.columns:
    print(f"\n📊 **7-Day Rolling Sentiment Analysis:**")
    print(f"   • Average 7-day sentiment: {sentiment_enhanced['sentiment_7d_avg'].mean():.2f}")
    print(f"   • Sentiment volatility (7-day): {sentiment_enhanced['sentiment_7d_volatility'].mean():.3f}")

# Show correlation between different sentiment measures
print(f"\n🔗 **Sentiment Feature Correlations:**")
numeric_sentiment_cols = sentiment_enhanced.select_dtypes(include=[np.number]).columns
if len(numeric_sentiment_cols) > 1:
    sentiment_corr = sentiment_enhanced[numeric_sentiment_cols].corr()
    print("Top correlations:")
    # Get correlation pairs
    corr_pairs = []
    for i in range(len(sentiment_corr.columns)):
        for j in range(i+1, len(sentiment_corr.columns)):
            col1 = sentiment_corr.columns[i]
            col2 = sentiment_corr.columns[j]
            corr_val = sentiment_corr.iloc[i, j]
            corr_pairs.append((col1, col2, corr_val))
    
    # Sort by absolute correlation
    corr_pairs.sort(key=lambda x: abs(x[2]), reverse=True)
    
    # Show top 5 correlations
    for col1, col2, corr_val in corr_pairs[:5]:
        print(f"   • {col1} ↔ {col2}: {corr_val:.3f}")


😰😤 **ENHANCED SENTIMENT FEATURES ANALYSIS:**

🔍 **Available Sentiment Columns:**
   Available: ['timestamp', 'value', 'classification', 'date', 'timestamp_UTC', 'date_parsed', 'date_standardized', 'sentiment_score', 'sentiment_change', 'sentiment_momentum', 'sentiment_7d_avg', 'sentiment_7d_volatility', 'sentiment_trend_7d', 'market_regime', 'extreme_fear_event', 'extreme_greed_event', 'cycle_phase', 'days_since_extreme_fear', 'days_since_extreme_greed']

📊 **Sentiment Score Distribution:**
   • Score 1: 508 days (19.2%)
   • Score 2: 781 days (29.5%)
   • Score 3: 396 days (15.0%)
   • Score 4: 633 days (23.9%)
   • Score 5: 326 days (12.3%)

🎯 **Market Regime Distribution:**
   • Fear_Dominated: 1,289 days (48.8%)
   • Greed_Dominated: 959 days (36.3%)
   • Balanced: 396 days (15.0%)

⚡ **Extreme Events:**
   • Extreme fear events: 508
   • Extreme greed events: 326

📈 **Sentiment Momentum Distribution:**
   • Stable: 2,050 days (77.5%)
   • Increasing: 300 days (11.3%)
   • Decreasi

Unnamed: 0,date_standardized,classification,sentiment_score,market_regime,sentiment_momentum,cycle_phase
0,2018-02-01,Fear,2,Fear_Dominated,Stable,Unknown
1,2018-02-02,Extreme Fear,1,Fear_Dominated,Decreasing,Declining
2,2018-02-03,Fear,2,Fear_Dominated,Increasing,Bottoming
3,2018-02-04,Extreme Fear,1,Fear_Dominated,Decreasing,Declining
4,2018-02-05,Extreme Fear,1,Fear_Dominated,Stable,Stable
5,2018-02-06,Extreme Fear,1,Fear_Dominated,Stable,Stable
6,2018-02-07,Fear,2,Fear_Dominated,Increasing,Bottoming
7,2018-02-08,Fear,2,Fear_Dominated,Stable,Stable
8,2018-02-09,Fear,2,Fear_Dominated,Stable,Stable
9,2018-02-10,Neutral,3,Balanced,Increasing,Recovery



📊 **7-Day Rolling Sentiment Analysis:**
   • Average 7-day sentiment: 2.80
   • Sentiment volatility (7-day): 0.346

🔗 **Sentiment Feature Correlations:**
Top correlations:
   • value ↔ sentiment_score: 0.967
   • value ↔ sentiment_7d_avg: 0.934
   • sentiment_score ↔ sentiment_7d_avg: 0.928
   • sentiment_score ↔ extreme_fear_event: -0.664
   • sentiment_change ↔ sentiment_trend_7d: 0.648


In [12]:
# Create master analysis dataset
master_dataset = feature_engineer.create_master_dataset(feature_sets)

print("🎯 **MASTER DATASET CREATED:**")
print("=" * 50)

# Dataset overview
print(f"\n📊 **Master Dataset Overview:**")
print(f"   • Total records: {len(master_dataset):,}")
print(f"   • Total features: {len(master_dataset.columns)}")
print(f"   • Unique traders: {master_dataset['Account'].nunique():,}")
print(f"   • Date range: {master_dataset['trading_date'].min()} to {master_dataset['trading_date'].max()}")

# Feature categories
performance_features = [col for col in master_dataset.columns if 'pnl' in col.lower() or 'roi' in col.lower()]
sentiment_features = [col for col in master_dataset.columns if 'sentiment' in col.lower()]
timing_features = [col for col in master_dataset.columns if any(x in col.lower() for x in ['performance_in', 'contrarian', 'momentum'])]

print(f"\n📋 **Feature Categories:**")
print(f"   • Performance features: {len(performance_features)}")
print(f"   • Sentiment features: {len(sentiment_features)}")
print(f"   • Timing features: {len(timing_features)}")

# Display sample of master dataset
print(f"\n📋 **Master Dataset Sample:**")
key_columns = ['Account', 'trading_date', 'total_pnl', 'sentiment_score', 'market_regime', 
               'win_rate', 'trader_type', 'contrarian_indicator']
available_columns = [col for col in key_columns if col in master_dataset.columns]
display(master_dataset[available_columns].head(10))



🔗 **Creating Master Analysis Dataset**
✅ Master dataset created: 1,953 records, 33 features
🎯 **MASTER DATASET CREATED:**

📊 **Master Dataset Overview:**
   • Total records: 1,953
   • Total features: 33
   • Unique traders: 32
   • Date range: 2023-04-30 to 2025-05-01

📋 **Feature Categories:**
   • Performance features: 5
   • Sentiment features: 1
   • Timing features: 5

📋 **Master Dataset Sample:**


Unnamed: 0,Account,trading_date,total_pnl,sentiment_score,market_regime,win_rate,trader_type,contrarian_indicator
0,0x083384f897ee0f19899168e3b1bec365f52a9012,2024-11-11,0.0,5,Greed_Dominated,0.2235,High_Frequency,0
1,0x083384f897ee0f19899168e3b1bec365f52a9012,2024-11-17,0.0,5,Greed_Dominated,0.2235,High_Frequency,0
2,0x083384f897ee0f19899168e3b1bec365f52a9012,2024-11-18,0.0,5,Greed_Dominated,0.2235,High_Frequency,0
3,0x083384f897ee0f19899168e3b1bec365f52a9012,2024-11-22,-19086.2783,5,Greed_Dominated,0.2235,High_Frequency,1
4,0x083384f897ee0f19899168e3b1bec365f52a9012,2024-11-26,1440.0968,5,Greed_Dominated,0.2235,High_Frequency,0
5,0x083384f897ee0f19899168e3b1bec365f52a9012,2024-11-27,-88548.2632,5,Greed_Dominated,0.2235,High_Frequency,1
6,0x083384f897ee0f19899168e3b1bec365f52a9012,2024-11-30,0.0,5,Greed_Dominated,0.2235,High_Frequency,0
7,0x083384f897ee0f19899168e3b1bec365f52a9012,2024-12-06,-33775.849,4,Greed_Dominated,0.2235,High_Frequency,1
8,0x083384f897ee0f19899168e3b1bec365f52a9012,2025-01-19,23834.6777,5,Greed_Dominated,0.2235,High_Frequency,0
9,0x083384f897ee0f19899168e3b1bec365f52a9012,2025-01-20,31542.2414,5,Greed_Dominated,0.2235,High_Frequency,0


In [13]:
# Save all feature sets
print("💾 **SAVING FEATURE ENGINEERED DATA:**")
print("=" * 50)

# Save individual feature sets
for name, df in feature_sets.items():
    filename = f"../data/features/{name}.csv"
    df.to_csv(filename, index=False)
    print(f"✅ Saved {name}: {filename}")

# Save master dataset
master_dataset.to_csv("../data/features/master_analysis_dataset.csv", index=False)
print(f"✅ Saved master dataset: data/features/master_analysis_dataset.csv")

# Summary statistics
total_features = sum(len(df.columns) for df in feature_sets.values())
total_records = sum(len(df) for df in feature_sets.values())

print(f"\n📊 **FEATURE ENGINEERING SUMMARY:**")
print(f"   • Total feature sets: {len(feature_sets)}")
print(f"   • Total features created: {total_features:,}")
print(f"   • Total records processed: {total_records:,}")
print(f"   • Master dataset: {len(master_dataset):,} records, {len(master_dataset.columns)} features")

print(f"\n🎯 **Phase 3B Complete - Feature Engineering Success!**")
print("✅ Ready for Phase 3C: Dataset Integration & Correlation Analysis")


💾 **SAVING FEATURE ENGINEERED DATA:**
✅ Saved daily_performance: ../data/features/daily_performance.csv
✅ Saved trader_behavior: ../data/features/trader_behavior.csv
✅ Saved sentiment_enhanced: ../data/features/sentiment_enhanced.csv
✅ Saved sentiment_cycles: ../data/features/sentiment_cycles.csv
✅ Saved timing_features: ../data/features/timing_features.csv
✅ Saved master dataset: data/features/master_analysis_dataset.csv

📊 **FEATURE ENGINEERING SUMMARY:**
   • Total feature sets: 5
   • Total features created: 106
   • Total records processed: 9,226
   • Master dataset: 1,953 records, 33 features

🎯 **Phase 3B Complete - Feature Engineering Success!**
✅ Ready for Phase 3C: Dataset Integration & Correlation Analysis
