In [1]:
# %% [0] Imports & Config
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import TimeSeriesSplit
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam

# reproducibility
np.random.seed(42)

In [2]:
# %% [1] Load & Inspect
df = pd.read_csv('origin_with_indicators.csv', parse_dates=['time'])
df = df.sort_values('time').reset_index(drop=True)
print("Columns:", df.columns.tolist())

Columns: ['time', 'price', 'candletype', 'candlebody', 'candleupperwick', 'candlelowerwick', 'tick_volume', 'CPI_Actual', 'CPI_Forecast', 'CPI_Previous', 'GDP_Actual', 'GDP_Forecast', 'GDP_Previous', 'Interest_Rate_Actual', 'Interest_Rate_Forecast', 'Interest_Rate_Previous', 'PCE_Actual', 'PCE_Forecast', 'PCE_Previous', 'PPI_Actual', 'PPI_Forecast', 'PPI_Previous', 'Macro_factor', 'RSI', 'MACD', 'Signal', 'SMA_50', 'SMA_200', 'SMA_280']


In [13]:
# %% [2] Define target & features
target_col = 'price'
feature_cols = [
    'candletype', 'candlebody', 'candleupperwick', 'candlelowerwick',
    'tick_volume',
    'CPI_Actual', 'CPI_Forecast', 'CPI_Previous',
    'GDP_Actual', 'GDP_Forecast', 'GDP_Previous',
    'Interest_Rate_Actual', 'Interest_Rate_Forecast', 'Interest_Rate_Previous',
    'PCE_Actual', 'PCE_Forecast', 'PCE_Previous',
    'PPI_Actual', 'PPI_Forecast', 'PPI_Previous',
    'Macro_factor', 'RSI', 'MACD', 'Signal',
    'SMA_50', 'SMA_200', 'SMA_280'
]
n_features = len(feature_cols)

In [14]:
# %% [3] Encode & Scale (to float32)
if df['candletype'].dtype == 'object':
    df['candletype'] = LabelEncoder().fit_transform(df['candletype'])

scalers = {}
for col in feature_cols + [target_col]:
    sc = MinMaxScaler()
    df[[col]] = sc.fit_transform(df[[col]])
    scalers[col] = sc

# cast everything to float32
df[feature_cols + [target_col]] = df[feature_cols + [target_col]].astype('float32')


In [16]:
# %% [4] Train/test split
horizon  = 7
lookback = 60

last_time  = df['time'].max()
test_start = last_time - pd.Timedelta(days=horizon)

train_df = df[df['time'] <= test_start].reset_index(drop=True)
test_df  = df[df['time']  > test_start].reset_index(drop=True)

# how many samples per epoch
n_samples = len(train_df) - lookback - horizon + 1

In [18]:
# %% [5] Generator for windows
def window_generator(df, input_cols, target_col, lookback, horizon, batch_size):
    """Yields (X_feat, y_feat), (X_price, y_price) batches on the fly."""
    N = len(df)
    i = 0
    while True:
        X_feat_batch = np.zeros((batch_size, lookback, n_features), dtype='float32')
        y_feat_batch = np.zeros((batch_size, n_features), dtype='float32')
        X_price_batch = np.zeros((batch_size, lookback, n_features), dtype='float32')
        y_price_batch = np.zeros((batch_size,), dtype='float32')
        for j in range(batch_size):
            if i + lookback + horizon > N:
                i = 0  # wrap (or shuffle index list if you prefer random)
            hist = df[input_cols].iloc[i : i + lookback].values
            X_feat_batch[j] = hist
            y_feat_batch[j] = df[input_cols].iloc[i + lookback].values
            X_price_batch[j] = hist
            y_price_batch[j] = df[target_col].iloc[i + lookback + horizon - 1]
            i += 1
        yield ({'input_1': X_feat_batch}, y_feat_batch), ({'input_2': X_price_batch}, y_price_batch)

# Note: we'll actually split into two separate generators for clarity:

def feat_generator(df, input_cols, lookback, horizon, batch_size):
    i = 0
    N = len(df)
    while True:
        Xb = np.zeros((batch_size, lookback, n_features), dtype='float32')
        yb = np.zeros((batch_size, n_features),    dtype='float32')
        for j in range(batch_size):
            if i + lookback + horizon > N: i = 0
            hist = df[input_cols].iloc[i : i + lookback].values
            Xb[j] = hist
            yb[j] = df[input_cols].iloc[i + lookback].values
            i += 1
        yield Xb, yb

def price_generator(df, input_cols, target_col, lookback, horizon, batch_size):
    i = 0
    N = len(df)
    while True:
        Xb = np.zeros((batch_size, lookback, n_features), dtype='float32')
        yb = np.zeros((batch_size,),               dtype='float32')
        for j in range(batch_size):
            if i + lookback + horizon > N: i = 0
            hist = df[input_cols].iloc[i : i + lookback].values
            Xb[j] = hist
            yb[j] = df[target_col].iloc[i + lookback + horizon - 1]
            i += 1
        yield Xb, yb

In [19]:
# %% [6] Define & train Feature‑Forecaster
hidden_units = 128

model_feat = Sequential([
    LSTM(hidden_units, input_shape=(lookback, n_features), name='input_1'),
    Dense(n_features, activation='linear')
])
model_feat.compile(optimizer=Adam(1e-3), loss='mse')

batch_size = 64
steps_per_epoch = n_samples // batch_size

model_feat.fit(
    feat_generator(train_df, feature_cols, lookback, horizon, batch_size),
    epochs=10,
    steps_per_epoch=steps_per_epoch,
)

  super().__init__(**kwargs)


MemoryError: Unable to allocate 6.58 MiB for an array with shape (27, 63873) and data type float32

In [None]:
# %% [7] Define & train Feature‑Forecaster
n_features   = len(feature_cols)
hidden_units = 128

model_feat = Sequential([
    LSTM(hidden_units, input_shape=(lookback, n_features)),
    Dense(n_features, activation='linear')
])
model_feat.compile(optimizer=Adam(1e-3), loss='mse')

model_feat.fit(
    X_feat_train, y_feat_train,
    epochs=10, batch_size=64, verbose=1
)

In [None]:

# %% [8] Define & train Price‑Predictor
model_price = Sequential([
    LSTM(hidden_units, input_shape=(lookback, n_features)),
    Dense(1, activation='linear')
])
model_price.compile(optimizer=Adam(1e-3), loss='mse')

model_price.fit(
    X_price_train, y_price_train,
    epochs=10, batch_size=64, verbose=1
)


In [None]:
# %% [9] Recursive forecasting helpers
def forecast_features(model, last_window, horizon=7):
    """Recursively predict next `horizon` feature vectors."""
    preds = []
    cur = last_window.copy()
    for _ in range(horizon):
        one = model.predict(cur)
        preds.append(one.flatten())
        cur = np.concatenate([cur[:,1:,:], one.reshape(1,1,-1)], axis=1)
    return np.array(preds)  # (horizon, F)

def forecast_price(model, feature_preds, hist_window):
    """Use rolled feature_preds to recursively predict price each day."""
    prices = []
    win = hist_window.copy()
    for feat in feature_preds:
        win = np.concatenate([win[:,1:,:], feat.reshape(1,1,-1)], axis=1)
        p = model.predict(win)
        prices.append(p.flatten()[0])
    return np.array(prices)