In [1]:
import datetime
import os
import random

import plotly.express as px
import plotly.graph_objects as go
import plotly.io
import plotly.subplots
import polars as pl
import polars.selectors as cs
import tensorflow as tf

print("TensorFlow version:", tf.__version__)

2024-05-03 11:19:54.504609: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


TensorFlow version: 2.15.0


In [2]:
tf.config.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]

In [3]:
random.seed(7)
tf.random.set_seed(7)

In [4]:
# hv.notebook_extension("plotly")

plotly.io.templates.default = "plotly_dark"

In [5]:
# TODO Fourier analysis and correlations

In [6]:
# folder containing the inputs from the competition
INPUT_DIR = os.path.abspath("input")

In [7]:
# TODO move all constants to the beginning

TARGET_LENGTH = 16  # length of the target sequences

# metadata columns: items on promotion, oil prices, weekday
METADATA_COLS = ["onpromotion", "dcoilwtico", "weekday"]

DATAFRAMES_TIMESTEPS = 600  # number of timesteps to keep in the training dataframe

TRAIN_RATIO = 0.8  # ratio of data to use for training vs for validation

# number of timesteps to use for training and for validation
TRAIN_TIMESTEPS = int(DATAFRAMES_TIMESTEPS * TRAIN_RATIO)
VALID_TIMESTEPS = DATAFRAMES_TIMESTEPS - TRAIN_TIMESTEPS

In [8]:
# training hyperparameters
LEARNING_RATE = 1e-4
BATCH_SIZE = 64

# buffer size for dataset shuffling
BUFFER_SIZE = 10_000

In [9]:
# the Kaggle competition uses Root Mean Squared Logarithmic Error
LOSS = tf.keras.losses.MeanSquaredLogarithmicError()

---
# Data preparation

In [10]:
def fill_missing_dates(df: pl.DataFrame) -> pl.DataFrame:
    """Adds the missing dates and fills the missing values."""

    date_df = pl.DataFrame(
        pl.date_range(
            df["date"].min(), df["date"].max(), interval="1d", eager=True
        ).alias("date")
    )

    df = date_df.join(
        df,
        on="date",
        how="outer",
    ).drop("date_right")

    # interpolate and fill backward/forward in case the first/last value is missing
    df = df.interpolate().fill_null(strategy="backward").fill_null(strategy="forward")

    return df

In [11]:
# read the dataframe containing the oil prices
oil_df = pl.read_csv(os.path.join(INPUT_DIR, "oil.csv"), try_parse_dates=True)
oil_df = fill_missing_dates(oil_df)

In [12]:
# read the training dataframe
train_df = pl.read_csv(os.path.join(INPUT_DIR, "train.csv"), try_parse_dates=True)
train_df = train_df.drop("id")  # we do not need the id

# keep only the chosen number of timesteps plus TARGET_LENGTH steps to use as test set
train_df = train_df.filter(
    pl.col("date")
    > train_df["date"].max() - datetime.timedelta(DATAFRAMES_TIMESTEPS + TARGET_LENGTH)
)

# partition the training data into dataframes for each (store_nbr, family) pair
dataframes = train_df.partition_by(
    ["store_nbr", "family"], include_key=False, as_dict=True
)

# fill missing dates and add oil prices and weekday signal to each dataframe
for key, df in dataframes.items():
    dataframes[key] = (
        fill_missing_dates(df)
        .join(oil_df, on="date", how="inner")
        .with_columns(pl.col("date").dt.weekday().alias("weekday"))
        .with_columns(cs.numeric().cast(pl.Float32))  # cast numeric columns to Float32
    )

In [13]:
# Create test dataframes to evaluate our models after training
# The test dataframes simulate the targets (from test.csv), except that
# we have the values of the 'sales' column (to compare with our predictions).

test_dataframes = {}  # dict to store the test dataframes

# cut the tail of each dataframe to use as test set
for key, df in dataframes.items():
    test_dataframes[key] = df.tail(TARGET_LENGTH)
    dataframes[key] = df.head(-TARGET_LENGTH)

In [14]:
print("columns:", dataframes[key].columns)
print("training dataframes shape:", dataframes[key].shape)
print("test dataframes shape    :", test_dataframes[key].shape)

columns: ['date', 'sales', 'onpromotion', 'dcoilwtico', 'weekday']
training dataframes shape: (600, 5)
test dataframes shape    : (16, 5)


In [15]:
# TODO Only need to add the values from test.csv for the predictions, not for training
# NOTE call 'target_df' to avoid confusion with the test dataframes (taken from train.csv)

# target_df = pl.read_csv(os.path.join(INPUT_DIR, "test.csv"), try_parse_dates=True)

# # Add a 'sales' column filled with null values to the test dataframe
# target_df = target_df.with_columns(pl.lit(None).alias("sales")).select(train_df.columns)

# # Concatenate the dataframes so that the training dataframe contains all time steps
# train_df = pl.concat([train_df, target_df])

---

# Scaling

In [16]:
STD_THRESHOLD = 1e-5


class DataFrameScaler:
    def __init__(self, df: pl.DataFrame):
        df = df.drop(["id", "date"]).cast(pl.Float32)

        self.mean = df.mean()
        self.std = df.std()

        # regularize the standard deviation to avoid division by too small values
        self.std = self.std.with_columns(
            pl.when(pl.col(x) < STD_THRESHOLD).then(1.0).otherwise(pl.col(x)).alias(x)
            for x in self.std.columns
        )

    def scale(self, df: pl.DataFrame) -> pl.DataFrame:
        return df.with_columns(
            ((pl.col(x) - self.mean[x]) / self.std[x]).cast(pl.Float32)
            for x in self.mean.columns
        )

    def unscale(self, df: pl.DataFrame) -> pl.DataFrame:
        return df.with_columns(
            ((pl.col(x) * self.std[x]) + self.mean[x]) for x in self.mean.columns
        )

In [17]:
# make a scaler using the training part of each dataframe
dfs = {key: DataFrameScaler(df.head(TRAIN_TIMESTEPS)) for key, df in dataframes.items()}

---
# Filtering

In [18]:
# NOTE zero-tail series
# filter out all series having their tail uniformly zero

ZERO_TAIL_LENGTH = 16

zero_tail_dataframes = {
    key: df
    for key, df in dataframes.items()
    if df.tail(ZERO_TAIL_LENGTH).select("sales").sum().item() == 0
}

for key in zero_tail_dataframes.keys():
    _ = dataframes.pop(key)

print("drop:", len(zero_tail_dataframes), "| remaining:", len(dataframes))

drop: 116 | remaining: 1666


In [19]:
# plot a random series with zero-tail
df = zero_tail_dataframes[random.sample(sorted(zero_tail_dataframes.keys()), 1)[0]]
px.line(data_frame=df, x="date", y="sales")

In [20]:
# NOTE series with gaps
# filter out all series having a gap of at least `GAP_LENGTH` timesteps (= 0)

# minimum gap length
GAP_LENGTH = 32

gap_dataframes = {
    key: df
    for key, df in dataframes.items()
    if df.with_columns(pl.col("sales").rolling_mean(window_size=GAP_LENGTH))[
        "sales"
    ].min()
    == 0.0
}

for key in gap_dataframes.keys():
    _ = dataframes.pop(key)

print("drop:", len(gap_dataframes), "| remaining:", len(dataframes))

drop: 188 | remaining: 1478


In [21]:
# plot a random series with a gap
df = gap_dataframes[random.sample(sorted(gap_dataframes.keys()), 1)[0]]
px.line(data_frame=df, x="date", y="sales")

In [22]:
# plot a random series from the remaining keys
df = dataframes[random.sample(sorted(dataframes.keys()), 1)[0]]
px.line(data_frame=df, x="date", y="sales")

---
# DATASET CREATION
---

In [23]:
class DatasetStack:
    def __init__(self, input_length: int, keys: list[str]):
        self.input_length = input_length
        self.window_length = input_length + TARGET_LENGTH
        self.keys = keys

        self._make_datasets()

    def _make_datasets(self):
        # make the first datasets from the first key
        df = self._prepare_dataframe(self.keys[0])
        self.train = self._make_window_dataset(df.head(TRAIN_TIMESTEPS))
        self.valid = self._make_window_dataset(df.tail(VALID_TIMESTEPS))

        # make datasets for all other keys and concatenate
        for key in self.keys[1:]:
            df = self._prepare_dataframe(key)

            _train = self._make_window_dataset(df.head(TRAIN_TIMESTEPS))
            _valid = self._make_window_dataset(df.tail(VALID_TIMESTEPS))

            self.train = self.train.concatenate(_train)
            self.valid = self.valid.concatenate(_valid)

        self.train = self.train.shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
        self.valid = self.valid.shuffle(BUFFER_SIZE).batch(BATCH_SIZE)

    def _make_window_dataset(self, df: pl.DataFrame) -> tf.data.Dataset:
        ds = tf.data.Dataset.from_tensor_slices(df)
        ds = ds.window(size=self.window_length, shift=1, drop_remainder=True)
        ds = ds.flat_map(lambda window: window.batch(self.window_length))

        # split each window into (input, target) pairs
        # - input  = first input_length timesteps, all columns
        # - target = last TARGET_LENGTH timesteps, 'sales' column only
        ds = ds.map(
            lambda window: (
                window[: self.input_length],
                tf.gather(window[-TARGET_LENGTH:], indices=[0], axis=-1),
            )
        )

        return ds

    def _prepare_dataframe(self, key: str) -> pl.DataFrame:
        df = dfs[key].scale(dataframes[key])
        df = df.select(["sales"] + METADATA_COLS)
        df = df.with_columns(pl.col(x).shift(-TARGET_LENGTH) for x in METADATA_COLS)

        return df

    # TODO remove if unused
    @property
    def length(self):
        """Returns the number of examples (not batches) in both datasets."""

        return (
            (TRAIN_TIMESTEPS - self.window_length + 1) * len(self.keys),
            (VALID_TIMESTEPS - self.window_length + 1) * len(self.keys),
        )

---
# MODEL SELECTION
---

In [24]:
# make a folder to store the training logs
os.makedirs(os.path.join("log", "selection"), exist_ok=True)

In [25]:
# The wrapper contains two models:
# - model_in: takes past values of target variable and outputs an intermediate sequence
# - model_out: takes the intermediate sequence + metadata and outputs the target sequence


@tf.keras.saving.register_keras_serializable()
class ModelWrapper(tf.keras.Model):
    def __init__(self, model_in: tf.keras.Model, model_out: tf.keras.Model, **kwargs):
        super().__init__(**kwargs)
        self.model_in = model_in
        self.model_out = model_out

    def call(self, inputs: tf.Tensor) -> tf.Tensor:
        # select the values of the target variable
        xs = tf.gather(inputs, indices=[0], axis=-1)

        # select the metadata values corresponding to the target timesteps
        metadata = tf.gather(inputs[:, -TARGET_LENGTH:, :], indices=[1, 2, 3], axis=-1)

        xs = self.model_in(xs)  # -> (batch, TARGET_LENGTH, 1)
        xs = tf.concat([xs, metadata], axis=-1)  # -> (batch, TARGET_LENGTH, 4)
        xs = self.model_out(xs)  # -> (batch, TARGET_LENGTH, 1)

        return xs

    def get_config(self):
        base_config = super().get_config()  # TODO empty -> remove?
        config = {
            "name": self.name,
            "model_in": tf.keras.saving.serialize_keras_object(self.model_in),
            "model_out": tf.keras.saving.serialize_keras_object(self.model_out),
        }

        return base_config | config

    @classmethod
    def from_config(cls, config):
        name = config.pop("name")
        model_in_config = config.pop("model_in")
        model_out_config = config.pop("model_out")

        model_in = tf.keras.saving.deserialize_keras_object(model_in_config)
        model_out = tf.keras.saving.deserialize_keras_object(model_out_config)

        return cls(model_in, model_out, name=name, **config)

---

In [26]:
# number of epochs to train for during model selection
SELECTION_EPOCHS = 5

# define several input length to test
INPUT_LENGTHS = (16, 32, 64, 96)

In [27]:
# sample keys at random to use during model selection
key_sample = random.sample(list(dataframes.keys()), 100)

# make a DatasetStacker for each input length
ds = {
    input_length: DatasetStack(input_length, key_sample)
    for input_length in INPUT_LENGTHS
}

In [51]:
# simple dense model to use as model_out for all models
model_out = tf.keras.Sequential(
    [
        tf.keras.Input((TARGET_LENGTH, 4)),
        tf.keras.layers.Dense(16, activation="relu"),
        tf.keras.layers.Dense(16, activation="relu"),
        tf.keras.layers.Dense(1),
    ]
)

In [29]:
def plot_selection_val_loss(models: dict[str, ModelWrapper], model_type: str):
    # read the validation loss for each model from the logs
    val_loss_df = pl.concat(
        [
            pl.read_csv(os.path.join("log", "selection", f"{model.name}.csv"))
            .select("val_loss")
            .rename(lambda _: f"{model.name}")
            for model in models.values()
        ],
        how="horizontal",
    ).insert_column(0, pl.Series("epoch", range(SELECTION_EPOCHS)))

    # plot the validation loss of each model
    plot = px.line(
        data_frame=val_loss_df,
        x="epoch",
        y=[f"{model.name}" for model in models.values()],
        title=f"validation loss of {model_type} models",
    )

    return plot

### dense model

- pros: performs very well
- cons: reaches a plateau very early in training (-> limited)

In [30]:
def make_dense_model(input_length: int) -> ModelWrapper:
    model_name = f"dense-{input_length}"

    model = tf.keras.Sequential(
        [
            tf.keras.Input((input_length, 1)),
            tf.keras.layers.Dense(16, activation="relu"),
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(TARGET_LENGTH),
            tf.keras.layers.Reshape((TARGET_LENGTH, 1)),
        ]
    )

    model = ModelWrapper(model, tf.keras.models.clone_model(model_out), name=model_name)

    model.compile(
        loss=LOSS,
        optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
    )

    return model

In [31]:
dense_models = {}

for input_length in INPUT_LENGTHS:
    model = make_dense_model(input_length)

    logger = tf.keras.callbacks.CSVLogger(
        os.path.join("log", "selection", f"{model.name}.csv"),
        append=False,
    )

    model.fit(
        ds[input_length].train,
        validation_data=ds[input_length].valid,
        epochs=SELECTION_EPOCHS,
        verbose=0,
        callbacks=[logger],
    )

    dense_models[input_length] = model

In [32]:
plot_selection_val_loss(dense_models, "dense")

### convolutional model

- pros: does not depend on input length, learns fast
- cons: significantly worse performance?

In [31]:
def make_conv_model(kernel_sizes: tuple[int, ...]) -> ModelWrapper:
    model_name = "conv-" + "".join(str(x) for x in kernel_sizes)

    model = tf.keras.Sequential()
    for kernel_size in kernel_sizes:
        model.add(
            tf.keras.layers.Conv1D(
                filters=16, kernel_size=kernel_size, activation="relu"
            )
        )
    model.add(tf.keras.layers.Lambda(lambda xs: xs[:, -1, :]))
    model.add(tf.keras.layers.Reshape((TARGET_LENGTH, 1)))

    model = ModelWrapper(model, tf.keras.models.clone_model(model_out), name=model_name)

    model.compile(
        loss=LOSS,
        optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
    )

    return model

In [34]:
conv_models = {}

for kernel_sizes in ((3, 3), (5, 5), (3, 3, 5), (3, 5, 7)):
    model = make_conv_model(kernel_sizes)

    logger = tf.keras.callbacks.CSVLogger(
        os.path.join("log", "selection", f"{model.name}.csv"),
        append=False,
    )

    # the conv model does not depend on input length
    # it only requires long enough input sequences
    model.fit(
        ds[32].train,
        validation_data=ds[32].valid,
        epochs=SELECTION_EPOCHS,
        verbose=0,
        callbacks=[logger],
    )

    conv_models[kernel_sizes] = model

In [35]:
plot_selection_val_loss(conv_models, "conv")

### LSTM model

- pros: works with any input length, good performance (with long inputs) 
- cons: very slow to train

In [59]:
lstm_model = tf.keras.Sequential(
    [
        tf.keras.Input(batch_shape=[None, None, 1]),
        tf.keras.layers.LSTM(16, return_sequences=True),
        tf.keras.layers.LSTM(TARGET_LENGTH, return_sequences=False),
        tf.keras.layers.Reshape((TARGET_LENGTH, 1)),
    ]
)

lstm_model = ModelWrapper(
    lstm_model, tf.keras.models.clone_model(model_out), name="LSTM"
)

In [37]:
lstm_models = {}

# train a copy of the model using each input length
for input_length in INPUT_LENGTHS:
    model = tf.keras.models.clone_model(lstm_model)

    # change the name to differentiate the models
    model._name = f"LSTM-{input_length}"

    logger = tf.keras.callbacks.CSVLogger(
        os.path.join("log", "selection", f"{model.name}.csv"),
        append=False,
    )

    model.compile(
        loss=LOSS,
        optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
    )

    model.fit(
        ds[input_length].train,
        validation_data=ds[input_length].valid,
        epochs=SELECTION_EPOCHS,
        verbose=0,
        callbacks=[logger],
    )

    lstm_models[input_length] = model

In [53]:
# LSTM-96 did not train for some reason, the log is from training a clone afterwards
# TODO if this happens again, figure out why

In [52]:
plot_selection_val_loss(lstm_models, "LSTM")

In [None]:
# make a dataframe containing the error of each model for each input length
# TODO check that this matches the 'lstm_eval_df.csv' backup file
lstm_eval_df = pl.DataFrame(
    {
        model.name: [
            model.evaluate(ds[eval_length].valid, verbose=0)
            for eval_length in INPUT_LENGTHS
        ]
        for model in lstm_models.values()
    }
).insert_column(0, pl.Series("eval_length", [str(x) for x in INPUT_LENGTHS]))

# NOTE need to have the eval_length as string otherwise plotly adds values

In [60]:
fig = go.Figure()

for input_length in INPUT_LENGTHS:
    fig.add_trace(
        go.Bar(
            name=f"LSTM-{input_length}",
            x=lstm_eval_df["eval_length"],
            y=lstm_eval_df[f"LSTM-{input_length}"],
        )
    )


fig.update_layout(
    title="error of LSTM models evaluated on various input lengths",
    xaxis_title="evaluation input length",
)

fig.show()

# NOTE see that LSTM-96 outperforms the others on all input lengths except 16

## models without metadata

for testing only, would need to make a different dataset and not use the `ModelWrapper` to be more efficient

In [None]:
# a model_out that drops the metadata
model_out_void = tf.keras.Sequential(
    [tf.keras.layers.Lambda(lambda xs: tf.gather(xs, indices=[0], axis=-1))]
)

In [None]:
# NOTE the LSTM model without metadata is significantly worse
simple_models = {}

simple_models["dense"] = tf.keras.Sequential(
    [
        tf.keras.Input((64, 1)),
        tf.keras.layers.Dense(16, activation="relu"),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(TARGET_LENGTH),
        tf.keras.layers.Reshape((TARGET_LENGTH, 1)),
    ]
)

simple_models["conv"] = tf.keras.Sequential(
    [
        tf.keras.layers.Conv1D(filters=16, kernel_size=5, activation="relu"),
        tf.keras.layers.Conv1D(filters=16, kernel_size=5, activation="relu"),
        tf.keras.layers.Lambda(lambda xs: xs[:, -1, :]),
        tf.keras.layers.Reshape((TARGET_LENGTH, 1)),
    ]
)

simple_models["lstm"] = lstm_model = tf.keras.Sequential(
    [
        tf.keras.layers.LSTM(16, return_sequences=True),
        tf.keras.layers.LSTM(TARGET_LENGTH, return_sequences=False),
        tf.keras.layers.Reshape((TARGET_LENGTH, 1)),
    ]
)

for key, model in simple_models.items():
    simple_models[key] = ModelWrapper(
        model, tf.keras.models.clone_model(model_out_void), name=f"simple-{key}"
    )

    simple_models[key].compile(
        loss=LOSS,
        optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
    )

In [None]:
simple_history = {}

for key, model in simple_models.items():
    logger = tf.keras.callbacks.CSVLogger(
        os.path.join("log", "selection", f"{model.name}.csv"),
        append=False,
    )

    model.fit(
        ds[64].train,
        validation_data=ds[64].valid,
        epochs=SELECTION_EPOCHS,
        verbose=0,
        callbacks=[logger],
    )

In [None]:
# TODO plot validation loss of simple models

# plot_selection_val_loss(simple_models, "simple")

## Conclusion

train 3 models (with metadata):
- `dense-64`
- `conv-55`
- `LSTM-96`

In [None]:
# create new instances of the selected models to train
models = {
    "dense": make_dense_model(input_length=64),
    "conv": make_conv_model((5, 5)),
    "lstm": tf.keras.models.clone_model(lstm_model),  # train with input_length = 96
}

# free some memory and variable names
del dense_models, conv_models, lstm_models, simple_models, ds

---
# TRAINING
---

In [65]:
# make folders to store the training logs and models
os.makedirs(os.path.join("log", "training"), exist_ok=True)
os.makedirs("models", exist_ok=True)

In [66]:
# TODO after training, make a new validation set (by sampling new keys)
# and eval each bin model on it to see which bin gave the best model
# it should be the last model (in theory)

In [None]:
for model in models.values():
    model.compile(
        loss=LOSS, optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE)
    )

In [49]:
# number of epochs per sample
SAMPLE_EPOCHS = 10

keys = list(dataframes.keys())


def train_model(
    model: ModelWrapper,
    input_length: int,
    sample_count: int = 10,
    sample_size: int = 100,
):
    # create a logger to log the training history of all samples successively
    logger = tf.keras.callbacks.CSVLogger(
        os.path.join("log", "training", f"{model.name}.csv"), append=True
    )

    for sample_id in range(sample_count):
        # sample keys at random and create the corresponding dataset
        key_sample = random.sample(keys, sample_size)
        ds = DatasetStack(input_length, key_sample)

        # create a checkpoint callback with the sample_id as part of the file name
        checkpoint = tf.keras.callbacks.ModelCheckpoint(
            os.path.join("models", f"{model.name}-{sample_id:02d}.keras"),
            monitor="val_loss",
            save_best_only=True,
        )

        # train for at least half the epochs before stopping
        early_stopping = tf.keras.callbacks.EarlyStopping(
            restore_best_weights=True, start_from_epoch=int(SAMPLE_EPOCHS / 2)
        )

        model.fit(
            ds.train,
            validation_data=ds.valid,
            epochs=SAMPLE_EPOCHS,
            verbose=0,
            callbacks=[checkpoint, logger, early_stopping],
        )

In [None]:
# train the dense model
train_model(models["dense"], input_length=64)

In [None]:
# train the conv model
train_model(models["conv"], input_length=64)

In [None]:
# train the LSTM model
train_model(models["lstm"], input_length=96)

---

In [40]:
lstm_train_df = pl.read_csv("log/training/LSTM.csv").with_row_index()

In [41]:
px.line(data_frame=lstm_train_df, x="index", y=["loss", "val_loss"])

In [60]:
lstm_model.get_config()

{'name': 'LSTM',
 'model_in': {'module': 'keras',
  'class_name': 'Sequential',
  'config': {'name': 'sequential_6',
   'layers': [{'module': 'keras.layers',
     'class_name': 'InputLayer',
     'config': {'batch_input_shape': (None, None, 1),
      'dtype': 'float32',
      'sparse': False,
      'ragged': False,
      'name': 'input_5'},
     'registered_name': None},
    {'module': 'keras.layers',
     'class_name': 'LSTM',
     'config': {'name': 'lstm_8',
      'trainable': True,
      'dtype': 'float32',
      'return_sequences': True,
      'return_state': False,
      'go_backwards': False,
      'stateful': False,
      'unroll': False,
      'time_major': False,
      'units': 16,
      'activation': 'tanh',
      'recurrent_activation': 'sigmoid',
      'use_bias': True,
      'kernel_initializer': {'module': 'keras.initializers',
       'class_name': 'GlorotUniform',
       'config': {'seed': None},
       'registered_name': None},
      'recurrent_initializer': {'module':

In [57]:
tf.keras.models.load_model("models/LSTM-02.keras")

TypeError: Error when deserializing class 'InputLayer' using config={'batch_shape': [None, None, 1], 'dtype': 'float32', 'sparse': False, 'name': 'input_layer_10'}.

Exception encountered: Unrecognized keyword arguments: ['batch_shape']

In [43]:
lstm_models = {
    f"LSTM-{sample_id:02d}": tf.keras.models.load_model(
        f"models/LSTM-{sample_id:02d}.keras"
    )
    for sample_id in range(10)
}

TypeError: Error when deserializing class 'InputLayer' using config={'batch_shape': [None, None, 1], 'dtype': 'float32', 'sparse': False, 'name': 'input_layer_10'}.

Exception encountered: Unrecognized keyword arguments: ['batch_shape']