In [8]:
import pandas as pd
from tensorflow.keras.callbacks import EarlyStopping

# 1. Reload the enhanced dataset
df = pd.read_csv('../data/BTCUSDT_1m_features.csv')
print("Enhanced dataset loaded!")

# 2. Recreate your feature columns list
feature_cols = [
    'ma_14', 'ema_14', 'bb_upper', 'bb_lower', 'rsi_14',
    'close_lag1', 'close_lag2', 'returns_1m',
    'day_of_week', 'hour_of_day', 'volume',
    # New features:
    'macd', 'macd_signal', 'bb_width', 'atr', 'stoch_k', 'stoch_d'
]

df['future_close_1m'] = df['close'].shift(-1)
df['target_1m'] = (df['future_close_1m'] > df['close']).astype(int)

# 5-minute horizon (new)
df['future_close_5m'] = df['close'].shift(-5)
df['target_5m'] = (df['future_close_5m'] > df['close']).astype(int)

# Drop the last row because 'future_close' will be NaN
df = df.dropna(subset=['future_close_1m', 'future_close_5m'])

# 3. Drop missing values (some indicators need previous data)
X = df[feature_cols].dropna()
y = df.loc[X.index, 'target_5m']  # Use 5-min target
# y = df.loc[X.index, 'target_1m']  # Use 1-min target

# 4. Split again
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, shuffle=False
)
X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, test_size=0.1, shuffle=False
)

# 5. Scaling
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(X_train)

X_train_scaled = scaler.transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

print("Scaling complete!")

# 6. Build a new model
from tensorflow import keras
from tensorflow.keras import layers

model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    layers.Dropout(0.3),
    layers.Dense(32, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(1, activation='sigmoid')
])

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# Create EarlyStopping callback
early_stopping = EarlyStopping(
    monitor='val_loss',  # Watch validation loss
    patience=5,          # Stop if no improvement after 5 epochs
    restore_best_weights=True  # Restore best weights after stopping
)

# Train the model with EarlyStopping
history = model.fit(
    X_train_scaled,
    y_train,
    epochs=50,  # can set higher now, EarlyStopping will decide when to stop
    batch_size=32,
    validation_data=(X_val_scaled, y_val),
    callbacks=[early_stopping]
)

# Conclusion - Predicting 5-minutes ahead is easier than 1-minute (as we expected).

Enhanced dataset loaded!
Scaling complete!
Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m972/972[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 762us/step - accuracy: 0.5035 - loss: 0.7104 - val_accuracy: 0.4990 - val_loss: 0.6928
Epoch 2/50
[1m972/972[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 658us/step - accuracy: 0.5225 - loss: 0.6931 - val_accuracy: 0.5074 - val_loss: 0.6925
Epoch 3/50
[1m972/972[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 658us/step - accuracy: 0.5183 - loss: 0.6913 - val_accuracy: 0.5030 - val_loss: 0.6931
Epoch 4/50
[1m972/972[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 650us/step - accuracy: 0.5214 - loss: 0.6913 - val_accuracy: 0.5161 - val_loss: 0.6920
Epoch 5/50
[1m972/972[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 677us/step - accuracy: 0.5291 - loss: 0.6903 - val_accuracy: 0.4981 - val_loss: 0.6919
Epoch 6/50
[1m972/972[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 639us/step - accuracy: 0.5299 - loss: 0.6905 - val_accuracy: 0.5138 - val_loss: 0.6918
Epoch 7/50
[1m972/972[0m 