# Login in Weights&Biases

In [None]:
!wandb login your_api_key

# Load libraries

In [None]:
import os
import gc
import glob
import pandas as pd
import numpy as np

from sklearn.metrics import log_loss
from sklearn.model_selection import GroupKFold, StratifiedGroupKFold

from tqdm.auto import tqdm

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # FATAL

import tensorflow as tf
from tensorflow.keras import models, layers
from tensorflow.keras.callbacks import ModelCheckpoint
from wandb.keras import WandbMetricsLogger
from ap_metric import score as ap_score

import wandb
from wandb.keras import WandbMetricsLogger

tqdm.pandas()
pd.set_option('display.max_rows', 200)

class CFG:
    kaggle = False
    type = 'train' # test, full
    train_epochs = 2
    sleep_period = 30
    inter_sleep_period = 60
    axis=3
    batch_size = 32
    learning_rate = 1e-4
    decay_step_ratio = 0.4

if CFG.kaggle:
    path = '/kaggle/input/'
else:
    path = 'data'

# Load data

In [None]:
# load anglez
anglez_features = np.load(f'{path}/anglez_features.npy')
anglez_mean, anglez_std = np.load(f'{path}/anglez_mean.npy'), np.load(f'{path}/anglez_std.npy')
anglez_features = (anglez_features - anglez_mean) / anglez_std

# load data
enmo_features = np.load(f'{path}/enmo_features.npy')
enmo_mean, enmo_std = np.load(f'{path}/enmo_mean.npy'), np.load(f'{path}/enmo_std.npy')
enmo_features = (enmo_features - enmo_mean) / enmo_std

# load labels
labels = np.load('data/labels.npy')

# load the rest
frames = pd.read_csv(f'{path}/frames.csv').drop('Unnamed: 0', axis=1)
series_idxs = pd.read_csv(f'{path}/series_idxs.csv').drop('Unnamed: 0', axis=1)
timeseries_idxs = pd.read_csv(f'{path}/timeseries_idxs.csv').drop('Unnamed: 0', axis=1)

# Grouped train-test split

In [None]:
series_list = timeseries_idxs['series_id'].unique()
train_threshold = int(len(series_list) * 0.8)
train_list = series_list[:train_threshold]
train_idx = timeseries_idxs[timeseries_idxs['series_id'].isin(train_list)].index
X, y = (anglez_features[train_idx], enmo_features[train_idx]), labels[train_idx]
val_list = series_list[train_threshold:]
val_idx = timeseries_idxs[timeseries_idxs['series_id'].isin(val_list)].index
X_val, y_val = (anglez_features[val_idx], enmo_features[val_idx]), labels[val_idx]
del train_idx, val_idx

# Model

In [None]:
# define Model (ConvNet + LSTM)
def cnn_bilstm(output_layer_width, height, width):
    model = models.Sequential()
    model.add(layers.TimeDistributed(layers.Conv2D(64, (5, 5), activation='elu'), input_shape=(None, height, width, 1)))
    model.add(layers.TimeDistributed(layers.MaxPooling2D((2, 2))))
    model.add(layers.TimeDistributed(layers.Conv2D(128, (3, 3), activation='elu')))
    model.add(layers.TimeDistributed(layers.MaxPooling2D((2, 2))))
    model.add(layers.TimeDistributed(layers.Conv2D(128, (3, 3), activation='elu')))
    model.add(layers.TimeDistributed(layers.MaxPooling2D((2, 2))))
    model.add(layers.TimeDistributed(layers.Flatten()))
    model.add(layers.TimeDistributed(layers.Dense(128, activation='elu')))
    model.add(layers.Dropout(0.25))
    model.add(layers.Bidirectional(layers.LSTM(128, return_sequences=True)))
    model.add(layers.Dropout(0.25))
    model.add(layers.TimeDistributed(layers.Dense(output_layer_width, activation=None)))
    opt = tf.keras.optimizers.Adam()
    loss = tf.keras.losses.BinaryFocalCrossentropy(from_logits=True) # 
    model.compile(optimizer=opt, loss=loss)
    return model

# Train utils

In [None]:
def get_data(X, y):
    # get data from two np arrays and concatenate them in one
    X1, X2 = X
    ds = tf.data.Dataset.from_tensor_slices((tf.concat([X1, X2], axis=3), y))
    # ds = ds.shuffle(CFG.buffer_size).repeat()
    ds = ds.batch(CFG.batch_size).prefetch(tf.data.AUTOTUNE)
    return ds

# train function
def train_model(model, X, y, X_val, y_val, epochs, decay_step_ratio, lr):
    ds = get_data(X, y)
    
    if X_val is not None and y_val is not None:
        val_ds = get_data(X_val, y_val)
    else:
        val_ds = None
    
    # checkpoint callback
    checkpoint_path = os.path.join(path, 'train/weights', 'saved-model-{epoch:02d}-{val_loss:.4f}.keras')
    cp_callback = tf.keras.callbacks.ModelCheckpoint(
                                                    filepath=checkpoint_path, 
                                                    save_weights_only=True,
                                                    monitor='metrics',
                                                    mode='max',
                                                    save_best_only=False,
                                                    save_freq='epoch',
                                                    verbose=1
                                                )
    
    # LR scheduler
    decay_steps = epochs * decay_step_ratio
    
    cosine_decay = tf.keras.optimizers.schedules.CosineDecay(
                                                            lr,
                                                            decay_steps=decay_steps,
                                                            alpha=0.0,
                                                            name=None,
                                                            warmup_target=None,
                                                            warmup_steps=0
                                                        )
    lr_callback = tf.keras.callbacks.LearningRateScheduler(cosine_decay)

    wnb_callback = WandbMetricsLogger()

    # train history
    hist = model.fit(ds, validation_data=val_ds, epochs=epochs, callbacks=[cp_callback, lr_callback, wnb_callback])
    return hist

# Train model

In [None]:
session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=10, inter_op_parallelism_threads=10)
session_conf.gpu_options.allow_growth = True
sess = tf.compat.v1.Session(config=session_conf)

# Initialize a new W&B run
wandb.init()

# delete prevoius weights to avoid confusion
file_list = sorted(glob.glob('train/weights/*.keras'), key=lambda f: int(''.join(filter(str.isdigit, f))))
for f in file_list:
    os.remove(f) 

height = X[0].shape[2]
width = X[0].shape[3] + X[1].shape[3]

num_classes = y.shape[-1]
model = cnn_bilstm(num_classes, height=height, width=width)
hist = train_model(model, X, y, X_val, y_val, epochs=CFG.train_epochs, decay_step_ratio=CFG.decay_step_ratio, lr=CFG.learning_rate)
    
# df = pd.DataFrame(hist.history)
# df.index.name = 'epoch'
 # df.to_csv(f'{path}train/model_training_log.csv')

# Evaluation

In [None]:
file_list = sorted(glob.glob('train/weights/*.keras'), key=lambda f: int(''.join(filter(str.isdigit, f))))
model = cnn_bilstm(num_classes, height=height, width=width)
for f in file_list:
    preds = model.predict(X_val)
    # your code for evaluation