Notebook is forked from here

https://www.kaggle.com/code/hamzaghanmi/tps-april-tensorflow-bi-lstm
    
If you find this notebook useful, please upvote the original as well.

In [None]:
!pip install --upgrade -q wandb

In [None]:
import wandb
import os

os.environ["WANDB_SILENT"] = "true"

from kaggle_secrets import UserSecretsClient
from wandb.keras import WandbCallback

user_secrets = UserSecretsClient()
wandb_api = user_secrets.get_secret("wandb_api") 
wandb_ent = user_secrets.get_secret("wandb_entity") 

wandb.login(key=wandb_api)

In [None]:
wandb.init(project="kaggle-tab-april-22-tpu", 
           entity=wandb_ent,
           name = 'tpu run s1d v2 rate 0.4',
          config = dict())

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import random
import os
import tensorflow as tf
import time, logging, gc
from sklearn.preprocessing import RobustScaler, StandardScaler
from sklearn.metrics import roc_auc_score

from tensorflow.keras.layers import *
from tensorflow.keras import *
from tensorflow.keras.callbacks import *
from sklearn.model_selection import KFold, GroupKFold
from tensorflow.keras.metrics import AUC
import matplotlib.pyplot as plt   

In [None]:
def seed_everything(seed=0):
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['TF_DETERMINISTIC_OPS'] = '1'

In [None]:
NFOLDS = 10

In [None]:
train = pd.read_csv('../input/tabular-playground-series-apr-2022/train.csv')
test = pd.read_csv('../input/tabular-playground-series-apr-2022/test.csv')
submission = pd.read_csv("../input/tabular-playground-series-apr-2022/sample_submission.csv")
labels = pd.read_csv("../input/tabular-playground-series-apr-2022/train_labels.csv")
groups = train["sequence"]

# EDA

In [None]:
train.head()

In [None]:
test.head()

In [None]:
labels.head()

## <h4> Data Description </h4>
* <b>sequence</b> - a unique id for each sequence 
* <b>subject</b> - a unique id for the subject in the experiment
* <b>step</b> - time step of the recording, in one second intervals
* <b>sensor_00 - sensor_12</b> - the value for each of the thirteen sensors at that time step 
* <b>state</b> - the value for each of the thirteen sensors at that time step
## <h4> Objective 🤾🏻‍♂️ </h4>
* For each sequence in the test set, we will predict a probability for the state variable

In [None]:
train.info()

In [None]:
test.info()

In [None]:
features  = [col for col in test.columns if col not in ("sequence","step","subject")]

In [None]:
train[features].describe() 

In [None]:
# adding labels to train data
train = pd.merge(train, labels,how='left', on="sequence")

# Feature Engineering

In [None]:
def addFeatures(df):  
    for feature in features:
        
        df[feature + '_diff1'] = df[feature] - df.groupby('sequence')[feature].shift(1)  
        df[feature + '_diff2'] = df[feature] - df.groupby('sequence')[feature].shift(2)   
        df.fillna(0, inplace=True)  
    return df

train = addFeatures(train)
test = addFeatures(test)

In [None]:
Window = 60

In [None]:
y = train['state'].to_numpy().reshape(-1, Window)
train.drop(["sequence","step","subject","state"], axis=1, inplace=True)
test.drop(["sequence","step","subject"], axis=1, inplace=True)

In [None]:
sc = StandardScaler()

sc.fit(train)
train = sc.transform(train)
test = sc.transform(test)

In [None]:
train = train.reshape(-1, Window, train.shape[-1])
test = test.reshape(-1, Window, train.shape[-1])

In [None]:
train.shape

In [None]:
# Detect hardware, return appropriate distribution strategy
print(tf.version.VERSION)
tf.get_logger().setLevel(logging.ERROR)
try: # detect TPU
    tpu = None
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
except ValueError: # detect GPU(s) and enable mixed precision
    strategy = tf.distribute.MirroredStrategy() # works on GPU and multi-GPU
    policy = tf.keras.mixed_precision.experimental.Policy('mixed_float16')
    tf.config.optimizer.set_jit(True) # XLA compilation
    tf.keras.mixed_precision.experimental.set_policy(policy)
    print('Mixed precision enabled')
print("REPLICAS: ", strategy.num_replicas_in_sync)

# Modeling

In [None]:
def plotHist(hist, fold=0):
    plt.plot(hist.history[f"auc_{fold}"])
    plt.plot(hist.history[f"val_auc_{fold}"])
    plt.title("model performance")
    plt.ylabel("area_under_curve")
    plt.xlabel("epoch")
    plt.legend(["train", "validation"], loc="upper left")
    plt.ylim(0.8, 1)
    plt.show()

In [None]:
def createModel(fold=0):   
    with strategy.scope():
    
        input_layer = Input(shape=(train.shape[-2:]))
        x0 = SpatialDropout1D(rate=0.4)(input_layer)
        x1 = Bidirectional(LSTM(768, return_sequences=True))(x0)
        x2 = Bidirectional(LSTM(512, return_sequences=True))(x1)
        x3 = Bidirectional(LSTM(384, return_sequences=True))(x2)
        x4 = Bidirectional(LSTM(256, return_sequences=True))(x3)
        x5 = Bidirectional(LSTM(128, return_sequences=True))(x4)
        x6 = Concatenate(axis=2)([x1,x2, x3, x4,x5])
        x7 = Dense(128, activation='selu')(x6)
        x8 = Dropout(0.2)(x7)
        output_layer = Dense(units=1, activation="sigmoid")(x8)
        model = Model(inputs=input_layer, outputs=output_layer, name='DNN_Model')
        
        model.compile(optimizer="adam",loss="binary_crossentropy", metrics=[AUC(name = f'auc_{fold}')])
    return(model)

In [None]:
model = createModel()
model.summary()

In [None]:
utils.plot_model(createModel())

In [None]:
fold_setup = pd.read_csv('../input/tab-april22-seq-data-prep-v1/train_labels.csv')
fold_labels = fold_setup[f'fold_subject_{NFOLDS}'].values
np.unique(fold_labels)

In [None]:
FOLDS = [x for x in range(NFOLDS)]
FOLDS

In [None]:
kf = GroupKFold(n_splits=NFOLDS)

test_preds = []
cv_preds = np.zeros((len(fold_labels),))

#for fold, (train_idx, test_idx) in enumerate(kf.split(train, y, groups.unique())):
for count, fold in enumerate(FOLDS):
    print(f"****** fold: {fold+1} *******")
    seed_everything(seed=0+fold)
    
    train_idx = fold_labels!=fold
    test_idx = fold_labels==fold
    
    X_train, X_valid = train[train_idx], train[test_idx]
    y_train, y_valid = y[train_idx], y[test_idx]
    lr = ReduceLROnPlateau(monitor=f"val_auc_{fold}",mode='max', factor=0.7, patience=1, verbose=1)
    es = EarlyStopping(monitor=f'val_auc_{fold}',mode='max', patience=5, verbose=1,restore_best_weights=True)
    
    model = createModel(fold=fold)    
        
    history = model.fit(X_train, y_train, validation_data=(X_valid, y_valid), epochs=30, batch_size = 256, callbacks = [es,lr,WandbCallback(monitor=f"val_auc_{fold}", 
    log_gradients=(False), save_model=(True),mode='max',save_weights_only=(True),save_graph=(False) )])
    
    test_preds.append(model.predict(test).squeeze())
    cv_preds[test_idx] = model.predict(X_valid).squeeze().mean(axis=1)
    plotHist(history, fold=fold)
    del X_train, X_valid, y_train, y_valid, model, history
    gc.collect()  

In [None]:
np.save("cv_preds",cv_preds)

In [None]:
roc_auc_score(y.mean(axis=1).reshape(-1, 1), cv_preds.reshape(-1, 1))

# Submission

In [None]:
submission["state"] = sum(test_preds)/kf.n_splits 
submission.to_csv('submission.csv', index=False)
submission   

# References:
* 1.[https://www.kaggle.com/code/ryanbarretto/lstm-baseline](https://www.kaggle.com/code/ryanbarretto/lstm-baseline)
* 2.[https://www.kaggle.com/code/hamzaghanmi/tensorflow-bi-lstm-with-tpu](https://www.kaggle.com/code/hamzaghanmi/tensorflow-bi-lstm-with-tpu)