In [5]:

import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, RobustScaler
from sklearn.metrics import accuracy_score, precision_recall_curve, auc, mean_absolute_percentage_error
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, GRU, Dense, Dropout, Concatenate, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import warnings
warnings.filterwarnings('ignore')

def load_ethereum_data():
    try:
        import yfinance as yf
        print("Downloading Ethereum data from Yahoo Finance...")
        eth = yf.download('ETH-USD', start='2016-03-10', end='2021-07-07', progress=False)

        if isinstance(eth.columns, pd.MultiIndex):
            eth.columns = eth.columns.droplevel(1)

        eth = eth[['Open', 'High', 'Low', 'Close', 'Volume']].copy()
        print(f"Successfully loaded {len(eth)} days of data")
        return eth
    except Exception as e:
        print(f"Error: {e}")
        return None

def load_external_data():
    try:
        import yfinance as yf
        print("Downloading external market data...")

        gold = yf.download('GC=F', start='2016-03-10', end='2021-07-07', progress=False)
        oil = yf.download('BZ=F', start='2016-03-10', end='2021-07-07', progress=False)
        btc = yf.download('BTC-USD', start='2016-03-10', end='2021-07-07', progress=False)
        spx = yf.download('^GSPC', start='2016-03-10', end='2021-07-07', progress=False)

        if isinstance(gold.columns, pd.MultiIndex):
            gold = gold['Close']
        else:
            gold = gold['Close']

        if isinstance(oil.columns, pd.MultiIndex):
            oil = oil['Close']
        else:
            oil = oil['Close']

        if isinstance(btc.columns, pd.MultiIndex):
            btc = btc['Close']
        else:
            btc = btc['Close']

        if isinstance(spx.columns, pd.MultiIndex):
            spx = spx['Close']
        else:
            spx = spx['Close']

        external = pd.DataFrame({
            'Gold_Close': gold,
            'Oil_Close': oil,
            'BTC_Close': btc,
            'SPX': spx
        })

        return external
    except Exception as e:
        print(f"Warning: Could not download external data: {e}")
        return None

def calculate_technical_indicators(df):
    df = df.copy()

    df['Returns'] = df['Close'].pct_change()
    df['Log_Returns'] = np.log(df['Close'] / df['Close'].shift(1))

    df['High_Low_Diff'] = df['High'] - df['Low']
    df['Close_Open_Diff'] = df['Close'] - df['Open']

    for period in [7, 14, 21, 30]:
        df[f'MA_{period}'] = df['Close'].rolling(window=period).mean()
        df[f'STD_{period}'] = df['Close'].rolling(window=period).std()

        df[f'Upper_BB_{period}'] = df[f'MA_{period}'] + (df[f'STD_{period}'] * 2)
        df[f'Lower_BB_{period}'] = df[f'MA_{period}'] - (df[f'STD_{period}'] * 2)

    df['EMA_12'] = df['Close'].ewm(span=12, adjust=False).mean()
    df['EMA_26'] = df['Close'].ewm(span=26, adjust=False).mean()
    df['MACD'] = df['EMA_12'] - df['EMA_26']
    df['Signal_Line'] = df['MACD'].ewm(span=9, adjust=False).mean()

    df['RSI_14'] = calculate_rsi(df['Close'], 14)
    df['RSI_21'] = calculate_rsi(df['Close'], 21)

    df['Vol_MA_7'] = df['Volume'].rolling(window=7).mean()
    df['Vol_MA_30'] = df['Volume'].rolling(window=30).mean()

    for lag in [1, 2, 3, 5, 7, 14]:
        df[f'Close_Lag_{lag}'] = df['Close'].shift(lag)
        df[f'Returns_Lag_{lag}'] = df['Returns'].shift(lag)

    df['Momentum_5'] = df['Close'] - df['Close'].shift(5)
    df['Momentum_10'] = df['Close'] - df['Close'].shift(10)

    df['ROC_5'] = ((df['Close'] - df['Close'].shift(5)) / df['Close'].shift(5)) * 100
    df['ROC_10'] = ((df['Close'] - df['Close'].shift(10)) / df['Close'].shift(10)) * 100

    return df

def calculate_rsi(series, period=14):
    delta = series.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=period).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=period).mean()
    rs = gain / loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

def create_sequences(data, target_col_idx, n_steps=60):
    X, y_price, y_direction = [], [], []

    for i in range(n_steps, len(data)):
        X.append(data[i-n_steps:i])

        current_price = data[i, target_col_idx]
        y_price.append(current_price)

        if i < len(data) - 1:
            next_price = data[i+1, target_col_idx]
            direction = 1 if next_price > current_price else 0
        else:
            direction = y_direction[-1] if len(y_direction) > 0 else 0
        y_direction.append(direction)

    return np.array(X), np.array(y_price), np.array(y_direction)

def build_hybrid_model(n_steps, n_features):
    n_features_x1 = n_features // 2
    n_features_x2 = n_features - n_features_x1

    input_x1 = Input(shape=(n_steps, n_features_x1), name='input_x1')
    gru1 = GRU(256, return_sequences=True, dropout=0.1, recurrent_dropout=0.1)(input_x1)
    bn1 = BatchNormalization()(gru1)
    gru2 = GRU(256, return_sequences=True, dropout=0.1, recurrent_dropout=0.1)(bn1)
    bn2 = BatchNormalization()(gru2)
    gru3 = GRU(256, return_sequences=False, dropout=0.1, recurrent_dropout=0.1)(bn2)
    bn3 = BatchNormalization()(gru3)
    mlp_x1 = Dense(128, activation='relu')(bn3)
    dropout1 = Dropout(0.2)(mlp_x1)

    input_x2 = Input(shape=(n_steps, n_features_x2), name='input_x2')
    lstm1 = LSTM(256, return_sequences=True, dropout=0.1, recurrent_dropout=0.1)(input_x2)
    bn4 = BatchNormalization()(lstm1)
    lstm2 = LSTM(256, return_sequences=True, dropout=0.1, recurrent_dropout=0.1)(bn4)
    bn5 = BatchNormalization()(lstm2)
    lstm3 = LSTM(256, return_sequences=False, dropout=0.1, recurrent_dropout=0.1)(bn5)
    bn6 = BatchNormalization()(lstm3)
    mlp_x2 = Dense(128, activation='relu')(bn6)
    dropout2 = Dropout(0.2)(mlp_x2)

    concatenated = Concatenate()([dropout1, dropout2])
    shared_dense1 = Dense(128, activation='relu')(concatenated)
    dropout3 = Dropout(0.2)(shared_dense1)
    shared_dense2 = Dense(64, activation='relu')(dropout3)

    price_output = Dense(1, activation='linear', name='price_output')(shared_dense2)
    direction_output = Dense(1, activation='sigmoid', name='direction_output')(shared_dense2)

    model = Model(
        inputs=[input_x1, input_x2],
        outputs=[price_output, direction_output]
    )

    return model

def calculate_rmse(y_true, y_pred):
    return np.sqrt(np.mean((y_true - y_pred) ** 2))

def calculate_pr_auc(y_true, y_pred_proba):
    precision, recall, _ = precision_recall_curve(y_true, y_pred_proba)
    return auc(recall, precision)

def main():

    eth_data = load_ethereum_data()
    if eth_data is None:
        return

    print(eth_data.head())
    external_data = load_external_data()

    print(external_data)
    if external_data is not None:
        df = eth_data.join(external_data, how='inner')
        print(f"Combined with external data: {len(df)} samples")
    else:
        df = eth_data

    print("\nCalculating technical indicators...")
    df = calculate_technical_indicators(df)
    df.dropna(inplace=True)

    print(f"Final dataset: {len(df)} samples with {len(df.columns)} features")

    feature_columns = df.columns.tolist()
    close_idx = feature_columns.index('Close')

    print("\nNormalizing features with RobustScaler...")
    scaler = RobustScaler()
    scaled_data = scaler.fit_transform(df)

    price_scaler = MinMaxScaler(feature_range=(0, 1))
    price_scaler.fit(df[['Close']])

    print("Creating sequences (window=60)...")
    n_steps = 60
    X, y_price, y_direction = create_sequences(scaled_data, close_idx, n_steps)

    print(f"Total sequences: {len(X)}")
    print(f"Sequence shape: {X.shape}")

    n_features = X.shape[2]
    n_features_x1 = n_features // 2
    n_features_x2 = n_features - n_features_x1

    X1 = X[:, :, :n_features_x1]
    X2 = X[:, :, n_features_x1:]

    split_idx = int(len(X) * 0.8)

    X1_train, X1_test = X1[:split_idx], X1[split_idx:]
    X2_train, X2_test = X2[:split_idx], X2[split_idx:]
    y_price_train, y_price_test = y_price[:split_idx], y_price[split_idx:]
    y_direction_train, y_direction_test = y_direction[:split_idx], y_direction[split_idx:]

    print(f"\nTraining samples: {len(X1_train)}")
    print(f"Test samples: {len(X1_test)}")

    up_count = np.sum(y_direction_train)
    down_count = len(y_direction_train) - up_count
    print(f"Direction distribution: Up={up_count}, Down={down_count}")

    print("\nBuilding improved hybrid model...")
    model = build_hybrid_model(n_steps, n_features)

    model.compile(
        optimizer=Adam(learning_rate=0.0005),
        loss={
            'price_output': 'huber',
            'direction_output': 'binary_crossentropy'
        },
        loss_weights={
            'price_output': 0.3,
            'direction_output': 0.7
        },
        metrics={
            'price_output': ['mae'],
            'direction_output': ['accuracy']
        }
    )

    model.summary()

    early_stopping = EarlyStopping(
        monitor='val_direction_output_accuracy',
        patience=15,
        mode='max',
        restore_best_weights=True,
        verbose=1
    )

    reduce_lr = ReduceLROnPlateau(
        monitor='val_direction_output_accuracy',
        factor=0.5,
        patience=7,
        min_lr=1e-7,
        mode='max',
        verbose=1
    )

    print("\nTraining model...")
    history = model.fit(
        [X1_train, X2_train],
        [y_price_train, y_direction_train],
        validation_split=0.2,
        epochs=150,
        batch_size=64,
        callbacks=[early_stopping, reduce_lr],
        verbose=1
    )

    print("\nEvaluating model...")
    y_pred_price, y_pred_direction = model.predict([X1_test, X2_test])

    y_pred_price_scaled = price_scaler.inverse_transform(y_pred_price.reshape(-1, 1)).flatten()
    y_test_price_scaled = price_scaler.inverse_transform(y_price_test.reshape(-1, 1)).flatten()

    mape = mean_absolute_percentage_error(y_test_price_scaled, y_pred_price_scaled) * 100
    rmse = calculate_rmse(y_test_price_scaled, y_pred_price_scaled)

    y_pred_direction_binary = (y_pred_direction > 0.5).astype(int).flatten()
    accuracy = accuracy_score(y_direction_test, y_pred_direction_binary)
    pr_auc = calculate_pr_auc(y_direction_test, y_pred_direction.flatten())

    print("\n" + "="*60)
    print("Model Evaluation Results")
    print("="*60)
    print(f"Regression Metrics:")
    print(f"  MAPE: {mape:.2f}%")
    print(f"  RMSE: ${rmse:.2f}")
    print(f"  Mean Price: ${np.mean(y_test_price_scaled):.2f}")
    print(f"\nClassification Metrics:")
    print(f"  Accuracy: {accuracy*100:.2f}%")
    print(f"  PR-AUC: {pr_auc:.3f}")
    print("\nPaper Results (Ethereum):")
    print(f"  MAPE: 3.17%")
    print(f"  RMSE: $82.03")
    print(f"  Accuracy: 90.26%")
    print(f"  PR-AUC: 0.930")
    print("="*60)

    print("\nSimulating trading strategy...")
    capital = 10000
    position = 0
    buy_price = 0
    trades = 0
    wins = 0

    for i in range(len(y_pred_direction_binary) - 1):
        if y_pred_direction_binary[i] == 1 and position == 0:
            position = 1
            buy_price = y_test_price_scaled[i]
            trades += 1
        elif y_pred_direction_binary[i] == 0 and position == 1:
            position = 0
            sell_price = y_test_price_scaled[i]
            profit = (sell_price - buy_price) / buy_price
            capital *= (1 + profit)
            if profit > 0:
                wins += 1

    profitability = ((capital - 10000) / 10000) * 100
    win_rate = (wins / trades * 100) if trades > 0 else 0

    print(f"\nTrading Simulation Results:")
    print(f"  Initial Capital: $10,000.00")
    print(f"  Final Capital: ${capital:,.2f}")
    print(f"  Profitability: {profitability:.2f}%")
    print(f"  Number of Trades: {trades}")
    print(f"  Win Rate: {win_rate:.2f}%")
    print(f"\n  Paper Result: 69% profitability (6 months)")
    print("="*60)

    model.save('ethereum_improved_model.h5')
    print("\nModel saved as 'ethereum_improved_model.h5'")

    results_df = pd.DataFrame({
        'True_Price': y_test_price_scaled,
        'Predicted_Price': y_pred_price_scaled,
        'True_Direction': y_direction_test,
        'Predicted_Direction': y_pred_direction_binary,
        'Prediction_Probability': y_pred_direction.flatten()
    })
    results_df.to_csv('improved_predictions.csv', index=False)
    print("Predictions saved as 'improved_predictions.csv'")

    return model, history

if __name__ == '__main__':
    model, history = main()


Downloading Ethereum data from Yahoo Finance...
Successfully loaded 1336 days of data
Price             Open        High         Low       Close      Volume
Date                                                                  
2017-11-09  308.644989  329.451996  307.056000  320.884003   893249984
2017-11-10  320.670990  324.717987  294.541992  299.252991   885985984
2017-11-11  298.585999  319.453003  298.191986  314.681000   842300992
2017-11-12  314.690002  319.153015  298.513000  307.907990  1613479936
2017-11-13  307.024994  328.415009  307.024994  316.716003  1041889984
Downloading external market data...
None

Calculating technical indicators...
Final dataset: 1307 samples with 49 features

Normalizing features with RobustScaler...
Creating sequences (window=60)...
Total sequences: 1247
Sequence shape: (1247, 60, 49)

Training samples: 997
Test samples: 250
Direction distribution: Up=500, Down=497

Building improved hybrid model...
Model: "model_3"
______________________________

                                                                                                  
Total params: 2458690 (9.38 MB)
Trainable params: 2455618 (9.37 MB)
Non-trainable params: 3072 (12.00 KB)
__________________________________________________________________________________________________

Training model...
Epoch 1/150


KeyboardInterrupt: 

In [4]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

def fetch_ethereum_binance():
    """Binance API"""
    import requests
    import time
    
    url = "https://api.binance.com/api/v3/klines"
    start_time = int(datetime(2021, 1, 1).timestamp() * 1000)
    end_time = int(datetime.now().timestamp() * 1000)
    
    all_data = []
    current_start = start_time
    
    while current_start < end_time:
        params = {
            'symbol': 'ETHUSDT',
            'interval': '1d',
            'startTime': current_start,
            'limit': 1000
        }
        
        try:
            response = requests.get(url, params=params, timeout=10)
            data = response.json()
            
            if not data or not isinstance(data, list):
                break
                
            all_data.extend(data)
            
            if len(data) < 1000:
                break
                
            current_start = data[-1][0] + 1
            time.sleep(0.1)
            
        except Exception as e:
            print(f"Error: {e}")
            break
    
    if not all_data:
        return None
        
    df = pd.DataFrame(all_data, columns=[
        'timestamp', 'open', 'high', 'low', 'close', 'volume',
        'close_time', 'quote_volume', 'trades', 'taker_buy_base',
        'taker_buy_quote', 'ignore'
    ])
    
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
    df = df[['timestamp', 'open', 'high', 'low', 'close', 'volume']]
    
    for col in ['open', 'high', 'low', 'close', 'volume']:
        df[col] = df[col].astype(float)
    
    df.set_index('timestamp', inplace=True)
    
    return df

def fetch_bitcoin_binance():
    """Bitcoin data"""
    import requests
    import time
    
    url = "https://api.binance.com/api/v3/klines"
    start_time = int(datetime(2021, 1, 1).timestamp() * 1000)
    end_time = int(datetime.now().timestamp() * 1000)
    
    all_data = []
    current_start = start_time
    
    while current_start < end_time:
        params = {
            'symbol': 'BTCUSDT',
            'interval': '1d',
            'startTime': current_start,
            'limit': 1000
        }
        
        try:
            response = requests.get(url, params=params, timeout=10)
            data = response.json()
            
            if not data or not isinstance(data, list):
                break
                
            all_data.extend(data)
            
            if len(data) < 1000:
                break
                
            current_start = data[-1][0] + 1
            time.sleep(0.1)
            
        except Exception as e:
            return None
    
    if not all_data:
        return None
        
    df = pd.DataFrame(all_data, columns=[
        'timestamp', 'open', 'high', 'low', 'close', 'volume',
        'close_time', 'quote_volume', 'trades', 'taker_buy_base',
        'taker_buy_quote', 'ignore'
    ])
    
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
    df = df[['timestamp', 'close']]
    df['close'] = df['close'].astype(float)
    df.rename(columns={'close': 'btc_close'}, inplace=True)
    df.set_index('timestamp', inplace=True)
    
    return df

def compute_pps_scores(X, y):
    """PPS calculation"""
    from sklearn.tree import DecisionTreeRegressor
    from sklearn.metrics import r2_score
    
    pps_scores = {}
    for col in X.columns:
        try:
            model = DecisionTreeRegressor(max_depth=4, random_state=42)
            X_col = X[col].values.reshape(-1, 1)
            model.fit(X_col, y)
            preds = model.predict(X_col)
            score = max(0, r2_score(y, preds))
            pps_scores[col] = score
        except:
            pps_scores[col] = 0
    
    return pps_scores

def compute_correlation(X, y):
    """Pearson correlation"""
    correlations = {}
    for col in X.columns:
        try:
            corr = np.corrcoef(X[col], y)[0, 1]
            correlations[col] = abs(corr) if not np.isnan(corr) else 0
        except:
            correlations[col] = 0
    return correlations

def feature_engineering(df):
    """Feature engineering"""
    df = df.copy()
    
    df['returns'] = df['close'].pct_change()
    df['log_returns'] = np.log(df['close'] / df['close'].shift(1))
    df['volatility'] = df['returns'].rolling(window=7).std()
    df['momentum'] = df['close'] - df['close'].shift(5)
    df['volume_change'] = df['volume'].pct_change()
    
    df['ma_7'] = df['close'].rolling(window=7).mean()
    df['ma_21'] = df['close'].rolling(window=21).mean()
    df['ma_50'] = df['close'].rolling(window=50).mean()
    
    df['ema_12'] = df['close'].ewm(span=12, adjust=False).mean()
    df['ema_26'] = df['close'].ewm(span=26, adjust=False).mean()
    
    delta = df['close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    rs = gain / loss
    df['rsi'] = 100 - (100 / (1 + rs))
    
    df['macd'] = df['ema_12'] - df['ema_26']
    df['macd_signal'] = df['macd'].ewm(span=9, adjust=False).mean()
    
    high_low = df['high'] - df['low']
    high_close = np.abs(df['high'] - df['close'].shift())
    low_close = np.abs(df['low'] - df['close'].shift())
    ranges = pd.concat([high_low, high_close, low_close], axis=1)
    true_range = np.max(ranges, axis=1)
    df['atr'] = true_range.rolling(14).mean()
    
    df['bb_middle'] = df['close'].rolling(window=20).mean()
    bb_std = df['close'].rolling(window=20).std()
    df['bb_upper'] = df['bb_middle'] + (bb_std * 2)
    df['bb_lower'] = df['bb_middle'] - (bb_std * 2)
    
    for lag in range(1, 8):
        df[f'close_lag_{lag}'] = df['close'].shift(lag)
        df[f'volume_lag_{lag}'] = df['volume'].shift(lag)
    
    df['price_range'] = df['high'] - df['low']
    df['price_change'] = df['close'] - df['open']
    
    return df

def adaptive_feature_selection(X, y, pps_threshold=0.2, corr_threshold=0.05):
    """Adaptive feature selection"""
    pps_scores = compute_pps_scores(X, y)
    selected_phase1 = [col for col, score in pps_scores.items() if score >= pps_threshold]
    
    print(f"Phase I: {len(selected_phase1)} features")
    
    if len(selected_phase1) == 0:
        selected_phase1 = list(X.columns)
    
    X_phase1 = X[selected_phase1]
    correlations = compute_correlation(X_phase1, y)
    selected_phase2 = [col for col, corr in correlations.items() if corr >= corr_threshold]
    
    print(f"Phase II: {len(selected_phase2)} features")
    
    if len(selected_phase2) == 0:
        selected_phase2 = selected_phase1[:20]
    
    return selected_phase2

def create_sequences(data, target, timesteps=7):
    """Create sequences"""
    X, y, y_direction = [], [], []
    
    for i in range(timesteps, len(data)):
        X.append(data[i-timesteps:i])
        y.append(target[i])
        direction = 1 if target[i] > target[i-1] else 0
        y_direction.append(direction)
    
    return np.array(X), np.array(y), np.array(y_direction)

def build_model(input_shape):
    """Build model"""
    from tensorflow import keras
    from tensorflow.keras import layers
    
    input_layer = layers.Input(shape=(input_shape[1], input_shape[2]))
    
    gru1 = layers.GRU(256, return_sequences=True)(input_layer)
    gru2 = layers.GRU(256, return_sequences=True)(gru1)
    gru3 = layers.GRU(256, return_sequences=False)(gru2)
    
    lstm1 = layers.LSTM(256, return_sequences=True)(input_layer)
    lstm2 = layers.LSTM(256, return_sequences=True)(lstm1)
    lstm3 = layers.LSTM(256, return_sequences=False)(lstm2)
    
    concatenated = layers.Concatenate()([gru3, lstm3])
    
    dense_reg = layers.Dense(128, activation='relu')(concatenated)
    dropout_reg = layers.Dropout(0.1)(dense_reg)
    output_reg = layers.Dense(1, name='price_forecast')(dropout_reg)
    
    dense_cls = layers.Dense(128, activation='relu')(concatenated)
    dropout_cls = layers.Dropout(0.1)(dense_cls)
    output_cls = layers.Dense(1, activation='sigmoid', name='direction_classification')(dropout_cls)
    
    model = keras.Model(inputs=input_layer, outputs=[output_reg, output_cls])
    
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=0.001),
        loss={'price_forecast': 'mse', 'direction_classification': 'binary_crossentropy'},
        loss_weights={'price_forecast': 0.5, 'direction_classification': 0.5},
        metrics={'price_forecast': ['mae', 'mape'], 'direction_classification': ['accuracy']}
    )
    
    return model

print("=" * 70)
print("Ethereum Price Prediction")
print("=" * 70)

print("\n[1] Data Collection")
eth_df = fetch_ethereum_binance()

if eth_df is None or len(eth_df) == 0:
    print("API unavailable")
else:
    print(f"Ethereum: {len(eth_df)} samples")
    
    print("\n[2] Bitcoin Data")
    btc_df = fetch_bitcoin_binance()
    if btc_df is not None:
        eth_df = eth_df.join(btc_df, how='inner')
        print(f"Combined: {len(eth_df)} samples")
    
    print("\n[3] Feature Engineering")
    eth_df = feature_engineering(eth_df)
    eth_df = eth_df.dropna()
    print(f"Features: {len(eth_df.columns)}, Samples: {len(eth_df)}")
    
    if len(eth_df) > 0:
        print("\n[4] Normalization")
        scaler_features = MinMaxScaler()
        scaler_target = MinMaxScaler()
        
        feature_cols = [col for col in eth_df.columns if col != 'close']
        X_raw = eth_df[feature_cols]
        y_raw = eth_df['close'].values
        
        X_norm = pd.DataFrame(
            scaler_features.fit_transform(X_raw),
            columns=feature_cols,
            index=eth_df.index
        )
        y_norm = scaler_target.fit_transform(y_raw.reshape(-1, 1)).flatten()
        print(f"X: {X_norm.shape}, y: {y_norm.shape}")
        
        print("\n[5] Feature Selection")
        selected = adaptive_feature_selection(X_norm, y_norm)
        X_selected = X_norm[selected]
        
        print("\n[6] Sequences")
        X_seq, y_seq, y_dir = create_sequences(X_selected.values, y_norm, 7)
        print(f"X: {X_seq.shape}, y: {y_seq.shape}")
        
        print("\n[7] Split")
        split = int(len(X_seq) * 0.8)
        X_train, X_test = X_seq[:split], X_seq[split:]
        y_train, y_test = y_seq[:split], y_seq[split:]
        y_dir_train, y_dir_test = y_dir[:split], y_dir[split:]
        print(f"Train: {X_train.shape[0]}, Test: {X_test.shape[0]}")
        
        print("\n[8] Training")
        model = build_model(X_train.shape)
        history = model.fit(
            X_train,
            {'price_forecast': y_train, 'direction_classification': y_dir_train},
            epochs=50,
            batch_size=32,
            validation_split=0.2,
            verbose=0
        )
        
        print("\n[9] Evaluation")
        results = model.evaluate(
            X_test,
            {'price_forecast': y_test, 'direction_classification': y_dir_test},
            verbose=0
        )
        
        print(f"Price MAE: {results[3]:.4f}")
        print(f"Price MAPE: {results[4]:.2f}%")
        
        print("\n[10] Predictions")
        preds = model.predict(X_test, verbose=0)
        y_pred = preds[0].flatten()
        y_pred_dir = preds[1].flatten()
        
        y_pred_orig = scaler_target.inverse_transform(y_pred.reshape(-1, 1)).flatten()
        y_test_orig = scaler_target.inverse_transform(y_test.reshape(-1, 1)).flatten()
        
        direction_accuracy = np.mean((y_pred_dir > 0.5).astype(int) == y_dir_test)
        print(f"Direction Accuracy: {direction_accuracy*100:.2f}%")
        
        from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
        
        mae = mean_absolute_error(y_test_orig, y_pred_orig)
        rmse = np.sqrt(mean_squared_error(y_test_orig, y_pred_orig))
        mape = np.mean(np.abs((y_test_orig - y_pred_orig) / y_test_orig)) * 100
        r2 = r2_score(y_test_orig, y_pred_orig)
        
        print(f"\nDetailed Metrics:")
        print(f"MAE: ${mae:.2f}")
        print(f"RMSE: ${rmse:.2f}")
        print(f"MAPE: {mape:.2f}%")
        print(f"R²: {r2:.4f}")
        
        results_df = pd.DataFrame({
            'Actual': y_test_orig,
            'Predicted': y_pred_orig,
            'Difference': y_test_orig - y_pred_orig,
            'Diff_Pct': ((y_test_orig - y_pred_orig) / y_test_orig * 100),
            'Direction_Pred': (y_pred_dir > 0.5).astype(int),
            'Direction_Actual': y_dir_test
        })
        
        print("\nPredictions:")
        print(results_df.head(10))
        
        results_df.to_csv('ethereum_predictions.csv', index=False)
        print("\nSaved: ethereum_predictions.csv")

print("\n" + "=" * 70)
print("Complete")
print("=" * 70)


Ethereum Price Prediction

[1] Data Collection
Ethereum: 1737 samples

[2] Bitcoin Data
Combined: 1737 samples

[3] Feature Engineering
Features: 39, Samples: 1688

[4] Normalization
X: (1688, 38), y: (1688,)

[5] Feature Selection
Phase I: 23 features
Phase II: 23 features

[6] Sequences
X: (1681, 7, 23), y: (1681,)

[7] Split
Train: 1344, Test: 337

[8] Training

[9] Evaluation
Price MAE: 0.0432
Price MAPE: 9.63%

[10] Predictions
Direction Accuracy: 51.34%

Detailed Metrics:
MAE: $165.91
RMSE: $198.53
MAPE: 5.74%
R²: 0.9482

Predictions:
    Actual    Predicted  Difference  Diff_Pct  Direction_Pred  \
0  2511.49  2688.680176 -177.190176 -7.055181               1   
1  2494.23  2627.786621 -133.556621 -5.354623               1   
2  2457.73  2620.535400 -162.805400 -6.624218               1   
3  2398.21  2584.179443 -185.969443 -7.754510               1   
4  2422.55  2551.811035 -129.261035 -5.335743               1   
5  2721.87  2562.510010  159.359990  5.854798               1  