In [1]:
#!/usr/bin/env python3
import argparse
import os
import random
import sys

import keras
import numpy as np
import tensorflow as tf
import wandb
from keras import mixed_precision
from tensorflow.data import Dataset
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from keras.layers import (ConvLSTM2D, Dense, Dropout, Flatten, Input, MaxPooling3D, TimeDistributed, GlobalAveragePooling2D)
from keras.models import Model
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator, image_utils
from loguru import logger
from sklearn.metrics import classification_report, confusion_matrix
from wandb.keras import WandbCallback, WandbModelCheckpoint, WandbMetricsLogger, WandbEvalCallback
from keras.callbacks import Callback
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

from tqdm import tqdm
import pickle
from wandb.keras import WandbMetricsLogger, WandbModelCheckpoint

# Define default hyperparameters
SEQUENCE_LENGTH = 16
IMAGE_HEIGHT = 480 // 5
IMAGE_WIDTH = 640 // 5
BATCH_SIZE = 8
EPOCHS = 50
LEARNING_RATE = 1e-4
PATIENCE = 5
DEBUG = False
FILENAME = f'weights-{SEQUENCE_LENGTH}_{IMAGE_HEIGHT}_{IMAGE_WIDTH}.h5'


def setup_logging(level="DEBUG", show_module=False):
    """
    Setups better log format for loguru
    """
    logger.remove(0)    # Remove the default logger
    log_level = level
    log_fmt = u"<green>["
    log_fmt += u"{file:10.10}…:{line:<3} | " if show_module else ""
    log_fmt += u"{time:HH:mm:ss.SSS}]</green> <level>{level: <8}</level> | <level>{message}</level>"
    logger.add(sys.stderr, level=log_level, format=log_fmt, colorize=True, backtrace=True, diagnose=True)


def create_model(input_shape):
    """Create a ConvLSTM model."""
    inputs = Input(shape=input_shape)
    x = ConvLSTM2D(filters=2, kernel_size=(3, 3), activation="tanh", recurrent_dropout=0.2, return_sequences=True)(inputs)
    # x = MaxPooling3D(pool_size=(2, 2, 2))(x)
    # x = TimeDistributed(GlobalAveragePooling2D())(x)
    x = Flatten()(x)
    x = Dense(4, activation="relu")(x)
    outputs = Dense(1, activation="sigmoid")(x)
    out_model = Model(inputs=inputs, outputs=outputs)
    out_model.summary()
    return out_model


class CustomBatchEndCallback(Callback):
    def __init__(self, X_train, y_train, **kwargs):
        super().__init__(**kwargs)
        self.x_train = X_train
        self.y_train = y_train
        self.reset_test_table()

    def on_train_batch_end(self, batch_ix, logs=None):
        if batch_ix % 10 == 0:
            # Get the X value (array of images) for the first sample in the batch
            X_step = self.x_train[batch_ix * BATCH_SIZE]
            y_step = "Sleep" if self.y_train[batch_ix * BATCH_SIZE] == 1 else "Awake"

            # create a table with each image of the sequence
            images = []
            for sequence_ix, image in enumerate(X_step):
                # image is the same as self.x_train[batch_ix * BATCH_SIZE + ix][0]
                image = self.x_train[indices[batch_ix * BATCH_SIZE + sequence_ix]][0]
                img = wandb.Image(image,
                                  caption=f"Image {indices[batch_ix * BATCH_SIZE] + sequence_ix} - Label: {y_step}")
                images.append(img)

            # Adds the row to the table with the actual index of the first image of the sequence,
            # the original label y, and the images
            self.test_table.add_data(indices[batch_ix * BATCH_SIZE], y_step, *images)
            print(f" | Samples {indices[batch_ix * BATCH_SIZE]}-{indices[batch_ix * BATCH_SIZE] + SEQUENCE_LENGTH - 1} - Label: {y_step})'")

        super().on_train_batch_end(batch_ix, logs)

    def reset_test_table(self):
        columns = ['Index', 'Prediction']
        for s in range(SEQUENCE_LENGTH):
            columns.append(f'Sample {s + 1}')
        self.test_table = wandb.Table(columns=columns)

    def on_epoch_end(self, epoch_ix, logs=None):
        wandb.log({"data": self.test_table}, commit=True)
        self.reset_test_table()
        super().on_epoch_end(epoch_ix, logs)


def load_data(images_path, seq_length, image_height, image_width):
    X, y = [], []
    images = []
    labels = []
    X_aug = []
    y_aug = []
    datagen = ImageDataGenerator(width_shift_range=0.2, height_shift_range=0.2, zoom_range=0.2)

    for file in tqdm(sorted(os.listdir(images_path), key=lambda x: int(x.split(".")[0].split("-")[0])), total=len(os.listdir(images_path))): # yapf: disable
        if file.endswith(".jpg"):
            logger.debug(f"Loading {file}")
            image = image_utils.load_img(
                os.path.join(images_path, file),
                target_size=(image_height, image_width),
                color_mode="grayscale",
            )
            image = image_utils.img_to_array(image) / 255.0
            images.append(image)
            if "sleep" in file:
                labels.append(1)
            else:
                labels.append(0)
        if len(images) == seq_length:
            X.append(np.array(images))
            y.append(labels[-1])

            # Augmentation
            augmented_images = []
            seed = random.randint(0, 1000)
            for image in images:
                # for _ in range(1):
                transformed = datagen.random_transform(image, seed=seed)
                augmented_images.append(transformed)

            X_aug.append(np.array(augmented_images))
            y_aug.append(labels[-1])

            images.pop(0)
            labels.pop(0)

        if len(images) > seq_length:
            images.pop(0)
            labels.pop(0)
    X = np.array(X)
    y = np.array(y)
    X_aug = np.array(X_aug)
    y_aug = np.array(y_aug)
    X = np.concatenate((X, X_aug), axis=0)
    y = np.concatenate((y, y_aug), axis=0)
    return X, y


def prepare_data():
    X, y = load_data("./data_slim", SEQUENCE_LENGTH, IMAGE_HEIGHT, IMAGE_WIDTH)
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42)
    _indices = shuffle(range(len(X_train)), random_state=42)
    return {"X": X, "y": y, "X_train": X_train, "y_train": y_train, "X_val": X_val, "y_val": y_val, "indices": _indices}


def load_and_split_data():
    if os.path.exists(".data"):
        data = {
            name: np.load(f".data/{name}.npy")
            for name in ["X", "y", "X_train", "y_train", "X_val", "y_val", "indices"]}
    else:
        data = prepare_data()
        os.makedirs(".data", exist_ok=True)
        for name, array in data.items():
            np.save(f".data/{name}.npy", array)
    return data


setup_logging("DEBUG" if DEBUG else "INFO")
logger.info(f"Num GPUs Available: {len(tf.config.list_physical_devices('GPU'))}")
# Enable mixed precision training
# policy = mixed_precision.Policy("mixed_float16")
# mixed_precision.set_global_policy(policy)

# start a new wandb run to track this script
wandb.init(project="aweful",
           config={
               "optimizer": "adam",
               "loss": "binary_crossentropy",
               "metric": "accuracy",
               "epoch": EPOCHS,
               "batch_size": BATCH_SIZE,})

d = load_and_split_data()
X, y, X_train, y_train = d["X"], d["y"], d["X_train"], d["y_train"]
X_val, y_val = d["X_val"], d["y_val"]
indices = d["indices"]

# Create the ConvLSTM model
input_shape = (SEQUENCE_LENGTH, IMAGE_HEIGHT, IMAGE_WIDTH, 1)
model = create_model(input_shape)

# Compile the model
optimizer = Adam(learning_rate=LEARNING_RATE)
model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=["accuracy"])

callbacks = [CustomBatchEndCallback(X_train, y_train), WandbCallback(save_model=True),] #EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)]

with tf.device("CPU"):
    train = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(BATCH_SIZE, drop_remainder=True, num_parallel_calls=tf.data.experimental.AUTOTUNE).prefetch(tf.data.experimental.AUTOTUNE)


2023-03-21 05:40:36.327818: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-21 05:40:37.962695: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
[32m[05:40:37.977][0m [1mINFO    [0m | [1mNum GPUs Available: 1[0m
2023-03-21 05:40:37.977367: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 16, 96, 128, 1)]  0         
                                                                 
 conv_lstm2d (ConvLSTM2D)    (None, 16, 94, 126, 2)    224       
                                                                 
 flatten (Flatten)           (None, 379008)            0         
                                                                 
 dense (Dense)               (None, 4)                 1516036   
                                                                 
 dense_1 (Dense)             (None, 1)                 5         
                                                                 
Total params: 1,516,265
Trainable params: 1,516,265
Non-trainable params: 0
_________________________________________________________________


2023-03-21 05:40:40.640994: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-03-21 05:40:40.641199: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-03-21 05:40:40.641356: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

In [2]:

with tf.device("GPU"):
    model.fit(
        train,
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        validation_data=(X_val, y_val),
        callbacks=callbacks,
    )

# Save the model weights
model.save_weights(FILENAME)
# model.load_weights(FILENAME)

# Evaluate the model on the test set
loss, acc = model.evaluate(X_val, y_val, verbose=2)
logger.info(f"Validation accuracy: {acc:.4f}, loss: {loss:.4f}")

# Make predictions on the test set
y_pred = model.predict(X_val)
y_pred_classes = np.round(y_pred)

# Evaluate the model performance
logger.info("\nConfusion matrix:\n" + str(confusion_matrix(y_val, y_pred_classes)))
logger.info("\nClassification report:\n" + str(classification_report(y_val, y_pred_classes)))

# Predict on the entire dataset to look for patterns
with tf.device("CPU"):
    X_predict = Dataset.from_tensor_slices(X).batch(BATCH_SIZE)

model_predictions = model.predict(X_predict)
model_predictions = (model_predictions > 0.5).astype(int)

# check and show results

i = 0
for i, (prediction, actual) in enumerate(zip(model_predictions, y)):
    if prediction != actual:
        logger.warning(f"{i} PRED: {'sleep' if prediction else 'not sleep'}\t ACTUAL: {'sleep' if actual else 'not sleep'}")
    else:
        logger.success(f"{i} {'sleep' if prediction else 'not sleep'}")
    i += 1

wandb.finish()


Epoch 1/50


2023-03-21 05:40:41.473929: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype int64 and shape [297]
	 [[{{node Placeholder/_1}}]]
2023-03-21 05:40:41.474108: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype int64 and shape [297]
	 [[{{node Placeholder/_1}}]]
2023-03-21 05:40:42.544989: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): UNIMPLEMENTED: The Conv2D op currently only supports the NHWC tensor format on the CPU. The op was given the format: 

 | Samples 167-182 - Label: Awake)'

2023-03-21 05:40:46.634149: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'while/Placeholder_2' with dtype float and shape [?,94,126,2]
	 [[{{node while/Placeholder_2}}]]
2023-03-21 05:40:46.648981: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,?,96,128,1]
	 [[{{node inputs}}]]
2023-03-21 05:40:46.654423: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'states' with dtype float and shape [?,94,12

INFO:tensorflow:Assets written to: /home/emi/Coding/aweful/wandb/run-20230321_054039-2wgw2cyp/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/home/emi/Coding/aweful/wandb/run-20230321_054039-2wgw2cyp/files/model-best)... Done. 0.1s


Epoch 2/50
 | Samples 167-182 - Label: Awake)'

2023-03-21 05:40:54.427472: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'while/Placeholder_2' with dtype float and shape [?,94,126,2]
	 [[{{node while/Placeholder_2}}]]
2023-03-21 05:40:54.442426: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,?,96,128,1]
	 [[{{node inputs}}]]
2023-03-21 05:40:54.447941: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'states' with dtype float and shape [?,94,12

INFO:tensorflow:Assets written to: /home/emi/Coding/aweful/wandb/run-20230321_054039-2wgw2cyp/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/home/emi/Coding/aweful/wandb/run-20230321_054039-2wgw2cyp/files/model-best)... Done. 0.0s


Epoch 3/50
 | Samples 167-182 - Label: Awake)'

2023-03-21 05:40:57.858693: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'while/Placeholder_2' with dtype float and shape [?,94,126,2]
	 [[{{node while/Placeholder_2}}]]
2023-03-21 05:40:57.873585: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,?,96,128,1]
	 [[{{node inputs}}]]
2023-03-21 05:40:57.878917: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'states' with dtype float and shape [?,94,12

INFO:tensorflow:Assets written to: /home/emi/Coding/aweful/wandb/run-20230321_054039-2wgw2cyp/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/home/emi/Coding/aweful/wandb/run-20230321_054039-2wgw2cyp/files/model-best)... Done. 0.0s


Epoch 4/50
 | Samples 167-182 - Label: Awake)'

2023-03-21 05:41:01.443197: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'while/Placeholder_2' with dtype float and shape [?,94,126,2]
	 [[{{node while/Placeholder_2}}]]
2023-03-21 05:41:01.457906: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,?,96,128,1]
	 [[{{node inputs}}]]
2023-03-21 05:41:01.463284: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'states' with dtype float and shape [?,94,12

INFO:tensorflow:Assets written to: /home/emi/Coding/aweful/wandb/run-20230321_054039-2wgw2cyp/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/home/emi/Coding/aweful/wandb/run-20230321_054039-2wgw2cyp/files/model-best)... Done. 0.0s


Epoch 5/50
 | Samples 167-182 - Label: Awake)'

2023-03-21 05:41:04.856308: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'while/Placeholder_2' with dtype float and shape [?,94,126,2]
	 [[{{node while/Placeholder_2}}]]
2023-03-21 05:41:04.870853: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,?,96,128,1]
	 [[{{node inputs}}]]
2023-03-21 05:41:04.876104: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'states' with dtype float and shape [?,94,12

INFO:tensorflow:Assets written to: /home/emi/Coding/aweful/wandb/run-20230321_054039-2wgw2cyp/files/model-best/assets


[34m[1mwandb[0m: Adding directory to artifact (/home/emi/Coding/aweful/wandb/run-20230321_054039-2wgw2cyp/files/model-best)... Done. 0.0s


Epoch 6/50
 | Samples 167-182 - Label: Awake)'