In [2]:
# This file is modified version of original:
# https://www.kaggle.com/code/baurzhanurazalinov/parkinson-s-freezing-tdcsfog-training-code/notebook

**Configuration**

In [3]:
# run with different configurations
# Model 1 => val_subjects = ['07285e', '220a17', '54ee6e', '312788', '24a59d', '4bb5d0', '48fd62', '79011a', '7688c1']
# Model 2 => 
# val_subjects = ['07285e', '220a17', '54ee6e', '312788', '24a59d', '4bb5d0', '48fd62', '79011a', '7688c1']
# CFG = {..., 'fog_model_dim': 256, ..., 'fog_model_num_encoder_layers': 3, ... }
# LEARNING_RATE = 0.01/24
# GPU_BATCH_SIZE = 16

# Model 3 => val_subjects = ['e39bc5', '516a67', 'af82b2', '4dc2f8', '743f4e', 'fa8764', 'a03db7', '51574c', '2d57c2']
# Model 4 => val_subjects = ['5c0b8a', 'a03db7', '7fcee9', '2c98f7', '2a39f8', '4f13b4', 'af82b2', 'f686f0', '93f49f', '194d1d', '02bc69', '082f01']

In [4]:
CFG = {'TPU': 1, 
       'block_size': 15552, # must be divisible by block_stride as well as patch_size.
       'block_stride': 15552//16,
       'patch_size': 18, 
       
       'fog_model_dim': 320,#320 @ except model 2
       'fog_model_num_heads': 6,
       'fog_model_num_encoder_layers': 5,# 5 @ except model 2
       'fog_model_num_lstm_layers': 2,
       'fog_model_first_dropout': 0.1,
       'fog_model_encoder_dropout': 0.1,
       'fog_model_mha_dropout': 0.0,
      }

LEARNING_RATE = 0.000263158 #0.01/38 @ except model 2
STEPS_PER_EPOCH = 64
WARMUP_STEPS = 64
EPOCHS = 50
WEIGHTS = ''

# validation subjects.
val_subjects = ['07285e', '220a17', '54ee6e', '312788', '24a59d', '4bb5d0', '48fd62', '79011a', '7688c1']

assert CFG['block_size'] % CFG['patch_size'] == 0
assert CFG['block_size'] % CFG['block_stride'] == 0

'''
Train and inference batch size

'''

GPU_BATCH_SIZE = 2#32, 4 @ local



'''
Mean-std normalization function. 
Example input: shape (5000), dtype np.float32
Example output: shape (5000), dtype np.float32

Used to normalize AccV, AccML, AccAP values.

'''

def sample_normalize(sample):
    # tf.math.reduce_mean( input_tensor, axis=None,) => 
    # reduces input_tensor along the dimensions given in axis by computing the mean of elements across the dimensions in axis.
    mean = tf.math.reduce_mean(sample)
    std = tf.math.reduce_std(sample)
    # tf.math.divide_no_nan(x,y) => computes a safe divide which returns 0 if y (denominator) is zero.
    sample = tf.math.divide_no_nan(sample-mean, std)
    
    return sample.numpy()

'''
Function for splitting a series into blocks. Blocks can overlap. 
How the function works:
Suppose we have a series with AccV, AccML, AccAP columns and len of 50000, that is (50000, 3). 
First, the series is padded so that the final length is divisible by CFG['block_size'] = 15552. Now the series shape is (62208, 3).
Then we get blocks: first block is series[0:15552, :], second block is series[972:16524, :], ... , last block is series[46656:62208, :].

'''

def get_blocks(series, columns):
    series = series.copy()
    series = series[columns]
    
    # series.head(3) =>
    #        AccV     AccML     AccAP  StartHesitation  Turn  Walking  Valid  Mask
    # 0 -0.276487 -0.169868 -1.734705                0     0        0      1     1
    # 1 -0.278077 -0.171851 -1.746803                0     0        0      1     1
    # 2 -0.273169 -0.174003 -1.741763                0     0        0      1     1    
    
    # series.values =>
    # [[-0.27648746 -0.16986818 -1.73470538 ...  0.          1.
    #    1.        ]
    #  [-0.27807749 -0.17185063 -1.74680347 ...  0.          1.
    #    1.        ]
    #  ...
    #  [-0.1392557  -0.35189566 -2.12723026 ...  0.          1.
    #    1.        ]
    #  [-0.14250613 -0.33945229 -2.14632764 ...  0.          1.
    #    1.        ]]
    
    # series.values.shape => (4682, 8)
    # type(series.values) => <class 'numpy.ndarray'>

    series = series.values
    series = series.astype(np.float32)
    
    # .ceil => rounds a number UP to the nearest integer.
    block_count = math.ceil(len(series) / CFG['block_size'])
    
    # [0, ...] => pad axis=0 with 0 values before and ... values after.
    # [0, 0] => pad axis=1 with 0 values before and 0 values after.
    series = np.pad(series, pad_width=[[0, block_count*CFG['block_size']-len(series)], [0, 0]])
    
    block_begins = list(range(0, len(series), CFG['block_stride']))
    block_begins = [x for x in block_begins if x+CFG['block_size'] <= len(series)]
    
    blocks = []
    for begin in block_begins:
        values = series[begin:begin+CFG['block_size']]
        blocks.append({'begin': begin,
                       'end': begin+CFG['block_size'],
                       'values': values})
    
    return blocks


In [5]:
# tf.config.experimental.list_physical_devices('GPU')

**Imports and Utils**

In [6]:
import os
import math
import random
import warnings

# if CFG['TPU']:
#     !pip install -q /lib/wheels/tensorflow-2.9.1-cp38-cp38-linux_x86_64.whl
#     !pip3 install -q U scikit-learn
    
import numpy as np 
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import scipy

from tqdm import tqdm
from itertools import cycle
from joblib import Parallel, delayed
from sklearn.metrics import average_precision_score

if CFG['TPU']:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect(tpu='local') 
    tpu_strategy = tf.distribute.TPUStrategy(tpu)
    TPU_BATCH_SIZE = GPU_BATCH_SIZE = tpu_strategy.num_replicas_in_sync*GPU_BATCH_SIZE
    
    
warnings.filterwarnings("ignore")
pd.set_option('display.max_colwidth', None)

def folder(path): 
    if not os.path.exists(path): os.makedirs(path)
        
def plot(e, size=(20, 4)):
    plt.figure(figsize=size)
    plt.plot(e)
    plt.show()

D0714 14:44:12.645699167    3249 config.cc:119]                        gRPC EXPERIMENT tcp_frame_size_tuning               OFF (default:OFF)
D0714 14:44:12.645725509    3249 config.cc:119]                        gRPC EXPERIMENT tcp_rcv_lowat                       OFF (default:OFF)
D0714 14:44:12.645728974    3249 config.cc:119]                        gRPC EXPERIMENT peer_state_based_framing            OFF (default:OFF)
D0714 14:44:12.645731619    3249 config.cc:119]                        gRPC EXPERIMENT flow_control_fixes                  ON  (default:ON)
D0714 14:44:12.645734177    3249 config.cc:119]                        gRPC EXPERIMENT memory_pressure_controller          OFF (default:OFF)
D0714 14:44:12.645736663    3249 config.cc:119]                        gRPC EXPERIMENT unconstrained_max_quota_buffer_size OFF (default:OFF)
D0714 14:44:12.645739164    3249 config.cc:119]                        gRPC EXPERIMENT new_hpack_huffman_decoder           ON  (default:ON)
D0714 14:44:12.

INFO:tensorflow:Deallocate tpu buffers before initializing tpu system.
INFO:tensorflow:Initializing the TPU system: local
INFO:tensorflow:Finished initializing TPU system.
INFO:tensorflow:Found TPU system:
INFO:tensorflow:*** Num TPU Cores: 8
INFO:tensorflow:*** Num TPU Workers: 1
INFO:tensorflow:*** Num TPU Cores Per Worker: 8
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:0, TPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:1, TPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:2, TPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:3, TPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:4, TPU

**Model**

In [7]:
'''
The transformer encoder layer
For more details, see https://arxiv.org/pdf/1706.03762.pdf [Attention Is All You Need]

'''

class EncoderLayer(tf.keras.Model):#tf.keras.Model tf.keras.layers.Layer # comment by me
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        
        self.mha = tf.keras.layers.MultiHeadAttention(num_heads=CFG['fog_model_num_heads'], key_dim=CFG['fog_model_dim'], dropout=CFG['fog_model_mha_dropout'])
        
        self.add = tf.keras.layers.Add()        
        self.layernorm = tf.keras.layers.LayerNormalization()
        
        self.seq = tf.keras.Sequential([tf.keras.layers.Dense(CFG['fog_model_dim'], activation='relu'), 
                                        tf.keras.layers.Dropout(CFG['fog_model_encoder_dropout']), 
                                        tf.keras.layers.Dense(CFG['fog_model_dim']), 
                                        tf.keras.layers.Dropout(CFG['fog_model_encoder_dropout']),
                                       ])
        
    def call(self, x, training=None):
        attn_output = self.mha(query=x, key=x, value=x)
        # attn_output.shape => (2, 864, 320)
        x = self.add([x, attn_output])
        x = self.layernorm(x)
        x = self.add([x, self.seq(x)])
        x = self.layernorm(x)
        # x.shape => (2, 864, 320)        
        return x
    
    def model(self):
        # input_shape=(not include batch size).
        x = tf.keras.Input(shape=( 864, 320), 
                                       batch_size=GPU_BATCH_SIZE)
        return tf.keras.Model(inputs=x, outputs=self.call(x))    
    
'''
FOGEncoder is a combination of transformer encoder (D=320, H=6, L=5) and two BidirectionalLSTM layers

'''

class FOGEncoder(tf.keras.Model):#tf.keras.Model tf.keras.layers.Layer # comment by me
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        
        self.first_linear = tf.keras.layers.Dense(CFG['fog_model_dim'])        
        
        self.sequence_len = CFG['block_size'] // CFG['patch_size']
        self.pos_encoding = tf.Variable(initial_value=tf.random.normal(shape=(1, self.sequence_len, CFG['fog_model_dim']), stddev=0.02), trainable=True) 
        
        self.add = tf.keras.layers.Add()
        
        self.first_dropout = tf.keras.layers.Dropout(CFG['fog_model_first_dropout'])
        
        self.enc_layers = tf.keras.Sequential([EncoderLayer() for _ in range(CFG['fog_model_num_encoder_layers'])])
        
        self.lstm_layers = tf.keras.Sequential([tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(CFG['fog_model_dim'], return_sequences=True)) for _ in range(CFG['fog_model_num_lstm_layers'])])

        
    def call(self, x, training=None): 
        x = x / 25.0 # Normalization attempt in the segment [-1, 1]
        x = self.first_linear(x) 
          
        if training: # augmentation by randomly roll of the position encoding tensor
            # tf.tile => 
            # output tensor's 0'th dimension has  self.pos_encoding[0]* GPU_BATCH_SIZE elements, -
            # - and the values of input are kept GPU_BATCH_SIZE times along the '0'th dimension.
            # output tensor's 1'th dimension has self.pos_encoding[1] * 1 elements, -
            # - and the values of input are kept 1 times along the '1'th dimension.            
            # output tensor's 2'th dimension has self.pos_encoding[2] * 1 elements, -
            # - and the values of input are kept 1 times along the '2'th dimension.  
            
            # tf.roll => 
            # The elements are shifted positively (towards larger indices) by the offset of shift along the dimension of axis. -
            # - Negative shift values will shift elements in the opposite direction. 
            # tf.random.uniform(shape=(2,), minval=-864, maxval=0, dtype=tf.int32).numpy() => array([-838, -624], dtype=int32)
            random_pos_encoding = tf.roll(tf.tile(self.pos_encoding, multiples=[GPU_BATCH_SIZE, 1, 1]), 
                                          shift=tf.random.uniform(shape=(GPU_BATCH_SIZE,), minval=-self.sequence_len, maxval=0, dtype=tf.int32),
                                          axis=GPU_BATCH_SIZE * [1],# 2 * [1] => [1, 1]
                                          )
            x = self.add([x, random_pos_encoding])
        
        else: # without augmentation 
            # tf.tile(self.pos_encoding, multiples=[GPU_BATCH_SIZE, 1, 1]).shape => (GPU_BATCH_SIZE, 864, 320)
            x = self.add([x, tf.tile(self.pos_encoding, multiples=[GPU_BATCH_SIZE, 1, 1])])
            
        x = self.first_dropout(x)
        
        #for i in range(CFG['fog_model_num_encoder_layers']): x = self.enc_layers[i](x)            
        #for i in range(CFG['fog_model_num_lstm_layers']): x = self.lstm_layers[i](x) 
        x = self.enc_layers(x)                        
        x = self.lstm_layers(x)         
            
        return x
    
    def model(self):
        x = tf.keras.Input(shape=( 864, 54), 
                                       batch_size=GPU_BATCH_SIZE)
        return tf.keras.Model(inputs=x, outputs=self.call(x))    
    
class FOGModel(tf.keras.Model):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        
        self.encoder = FOGEncoder()
        self.last_linear = tf.keras.layers.Dense(3) 
        
    def call(self, x): 
        x = self.encoder(x)                 
        x = self.last_linear(x) 
        x = tf.nn.sigmoid(x) 
        
        return x
    
    def model(self):
        # input_shape=(not include batch size).
        x = tf.keras.Input(shape=( CFG['block_size'] // CFG['patch_size'], CFG['patch_size']*3), 
                                       batch_size=GPU_BATCH_SIZE)
        return tf.keras.Model(inputs=x, outputs=self.call(x))



In [8]:
# model = FOGModel().model()
# model.compile(loss=loss_function, optimizer=tf.keras.optimizers.Adam(learning_rate=CustomSchedule(LEARNING_RATE, WARMUP_STEPS), beta_1=0.9, beta_2=0.98, epsilon=1e-9))
# model.fit(dataset, epochs=EPOCHS, steps_per_epoch=STEPS_PER_EPOCH, callbacks=[PredictionFnCallback()])

**Tdcsfog preparing**

In [9]:
'''
Create train blocks with AccV, AccML, AccAP, StartHesitation, Turn, Walking, Valid, Mask columns and save in the directory

'''

# pd.read_csv('tdcsfog_metadata.csv').head(3) =>
#            Id Subject  Visit  Test Medication
# 0  003f117e14  4dc2f8      3     2         on
# 1  009ee11563  f62eec      4     2         on
# 2  011322847a  231c3b      2     2         on

# save_path = './kaggle/working/train/tdcsfog'; folder(save_path); 
# tdcsfog_metadata = pd.read_csv('tdcsfog_metadata.csv').set_index('Id')
# fetch over kaggle:
save_path = '/kaggle/working/train/tdcsfog'; folder(save_path); 
tdcsfog_metadata = pd.read_csv('/kaggle/input/tlvmc-parkinsons-freezing-gait-prediction/tdcsfog_metadata.csv').set_index('Id')

# tdcsfog_metadata.head(3) =>
#            Subject  Visit  Test Medication
# Id                                        
# 003f117e14  4dc2f8      3     2         on
# 009ee11563  f62eec      4     2         on
# 011322847a  231c3b      2     2         on

blocks_descriptions = []
# total=number of expected iterations.
for Id in tqdm(tdcsfog_metadata.index, total=len(tdcsfog_metadata.index), desc='Preparing'):
    #series = pd.read_csv(f'train/tdcsfog/{Id}.csv')
    series = pd.read_csv(f'/kaggle/input/tlvmc-parkinsons-freezing-gait-prediction/train/tdcsfog/{Id}.csv')
    
    # series.head(3) =>
    #    Time      AccV     AccML     AccAP  StartHesitation  Turn  Walking
    # 0     0 -9.533939  0.566322 -1.413525                0     0        0
    # 1     1 -9.536140  0.564137 -1.440621                0     0        0
    # 2     2 -9.529345  0.561765 -1.429332                0     0        0    
    
    # series['AccV'].values => [-9.5339393  -9.5361403   ... -9.3439773 -9.34847668]
    # type(series['AccV'].values) => <class 'numpy.ndarray'>
    # series['AccV'].values.shape => (4682,)
    
    series['AccV'] = sample_normalize(series['AccV'].values)
    series['AccML'] = sample_normalize(series['AccML'].values)
    series['AccAP'] = sample_normalize(series['AccAP'].values)
    series['Valid'] = 1
    series['Mask'] = 1
    
    blocks = get_blocks(series, ['AccV', 'AccML', 'AccAP', 'StartHesitation', 'Turn', 'Walking', 'Valid', 'Mask'])
    # break # my break

    for block_count, block in enumerate(blocks):
        fname, values = f'{Id}_{block_count}.npy', block['values']
        block_description = {}
        block_description['Id'] = Id
        block_description['Count'] = block_count
        block_description['File'] = fname
        block_description['Path'] = f'{save_path}/{fname}'
        block_description['Source'] = 'tsfog'
        
        block_description['StartHesitation_size'] = np.sum(values[:, 3])
        block_description['Turn_size'] = np.sum(values[:, 4])
        block_description['Walking_size'] = np.sum(values[:, 5])
        block_description['Valid_size'] = np.sum(values[:, 6])
        block_description['Mask_size'] = np.sum(values[:, 7])

        blocks_descriptions.append(block_description)
        np.save(f'{save_path}/{fname}', values)

blocks_descriptions = pd.DataFrame(blocks_descriptions)

Preparing: 100%|██████████| 833/833 [00:22<00:00, 37.62it/s]


In [10]:
blocks_descriptions.head(3)

Unnamed: 0,Id,Count,File,Path,Source,StartHesitation_size,Turn_size,Walking_size,Valid_size,Mask_size
0,003f117e14,0,003f117e14_0.npy,/kaggle/working/train/tdcsfog/003f117e14_0.npy,tsfog,0.0,788.0,0.0,4682.0,4682.0
1,009ee11563,0,009ee11563_0.npy,/kaggle/working/train/tdcsfog/009ee11563_0.npy,tsfog,0.0,4341.0,0.0,9920.0,9920.0
2,011322847a,0,011322847a_0.npy,/kaggle/working/train/tdcsfog/011322847a_0.npy,tsfog,0.0,281.0,0.0,5187.0,5187.0


**Train Dataset**

In [11]:
'''
Selecting validation subjects
FOGModel train data preparing

'''

def write_to_ram(fog):
    fog = fog[['Id', 'Count', 'Path']]
    
    for _, row in tqdm(fog.iterrows(), total=len(fog), desc='Write'):
        Id, Count, path = row['Id'], row['Count'], row['Path']
        
        # Read data
        series = np.load(path) # ['AccV', 'AccML', 'AccAP', 'StartHesitation', 'Turn', 'Walking', 'Valid', 'Mask']
                
        # series.shape => (15552, 8)
        # Create patches
        series = tf.reshape(series, shape=(CFG['block_size'] // CFG['patch_size'], CFG['patch_size'], series.shape[1]))
        # series.shape => (864, 18, 8)

        # Create input
        series_input = series[:, :, 0:3]
        # series_input.shape => (864, 18, 3)
        series_input = tf.reshape(series_input, shape=(CFG['block_size'] // CFG['patch_size'], -1))
        # series_input.shape => (864, 54)

        # Create target
        series_target = series[:, :, 3:]
        # series_target.shape => (864, 18, 5)
        # tf.transpose(a, perm=None, ...) => permutes the dimensions according to the value of perm.
        series_target = tf.transpose(series_target, perm=[0, 2, 1])
        # series_target.shape => (864, 5, 18)
        series_target = tf.reduce_max(series_target, axis=-1)
        # series_target.shape => (864, 5)
        # .cast() => casts a tensor to a new dtype.
        series_target = tf.cast(series_target, tf.int32)# tf.int64
        # series_target.shape => (864, 5)

        # count is the block number for particular Id.
        RAM[(Id, Count)] = (series_input, series_target)


# tdcsfog_metadata['Subject'].apply(lambda x: x not in val_subjects) =>
# Id
# 003f117e14     True
# 009ee11563     True
#               ...  
# feba449e1a    False
# ffda8fadfd     True
# Name: Subject, Length: 833, dtype: bool

# tdcsfog_metadata.index => 
# Index(['003f117e14', '009ee11563', '011322847a', '01d0fe7266', '024418ba39',
#        ...
#        'fd5300c038', 'fe33f7591d', 'fe4dcf3ded', 'fe7d3b45f2', 'feadfa435d',
#       dtype='object', name='Id', length=833)

# type(tdcsfog_metadata.index) => <class 'pandas.core.indexes.base.Index'>

train_ids = tdcsfog_metadata[tdcsfog_metadata['Subject'].apply(lambda x: x not in val_subjects)].index.tolist()
val_ids = tdcsfog_metadata[tdcsfog_metadata['Subject'].apply(lambda x: x in val_subjects)].index.tolist()

train_blocks_descriptions = blocks_descriptions[blocks_descriptions['Id'].apply(lambda x: x in train_ids)]

RAM = {} 
write_to_ram(train_blocks_descriptions)

print(f'\n[Train ids] {len(train_ids)} [Val ids] {len(val_ids)} ({100*len(val_ids)/(len(train_ids)+len(val_ids)):.1f})')
print(f'[Train blocks] {len(train_blocks_descriptions )}\n')

Write: 100%|██████████| 2109/2109 [00:06<00:00, 318.23it/s]


[Train ids] 717 [Val ids] 116 (13.9)
[Train blocks] 2109






In [12]:
'''
Create a random train dataset from train_blocks_descriptions DataFrame

'''

def read(row):
    
    def read_from_ram(Id, Count):  
        series_inputs, series_targets = RAM[(Id.numpy().decode('utf-8'), Count.numpy())]
        series_targets = series_targets.numpy().astype(np.float32)
        
        return series_inputs, series_targets
    
    # .py_function() => wraps a python function into a TensorFlow op that executes it eagerly. -
    # - this function allows expressing computations in a TensorFlow graph as Python functions.
    [series_input, series_target] = tf.py_function(read_from_ram, [row['Id'], row['Count']], [tf.float32, tf.float32])
    # series_input.shape, series_target.shape => (864, 54), (864, 5)
    series_input.set_shape(shape=(CFG['block_size'] // CFG['patch_size'], CFG['patch_size']*3))
    series_target.set_shape(shape=(CFG['block_size'] // CFG['patch_size'], 5))
    # series_input.shape, series_target.shape => (864, 54), (864, 5)
    
    return series_input, series_target

  
groups = [group.aggregate(dict, axis=1).tolist() for Id, group in train_blocks_descriptions.groupby('Id')]
# Id => ff4f844fd3

# type(group) => <class 'pandas.core.frame.DataFrame'>
# group => 
#              Id  Count               File   ...   Mask_size
# 2365  ff4f844fd3      0   ff4f844fd3_0.npy  ...   15552.0
# 2366  ff4f844fd3      1   ff4f844fd3_1.npy  ...   14989.0
# 2367  ff4f844fd3      2   ff4f844fd3_2.npy  ...   14017.0

# type(group.aggregate(dict, axis=1)) => <class 'pandas.core.series.Series'>
# .aggregate(func=None, axis=0, ...) => aggregate over the specified axis.
# group.aggregate(dict, axis=1) =>
# 2365 {'Id': 'ff4f844fd3', 'Count': 0, 'File': 'ff4f844fd3_0.npy', ... 'Mask_size': 15552.0}
# 2366 {'Id': 'ff4f844fd3', 'Count': 1, 'File': 'ff4f844fd3_1.npy', ... 'Mask_size': 14989.0}
# ...
# 2381 {'Id': 'ff4f844fd3', 'Count': 16, 'File': 'ff4f844fd3_16.npy', ... 'Mask_size': 409.0}]
# dtype: object

# group.aggregate(dict, axis=1).tolist() =>
# [{'Id': 'ff4f844fd3', 'Count': 0, 'File': 'ff4f844fd3_0.npy', ... 'Mask_size': 15552.0}, 
# {'Id': 'ff4f844fd3', 'Count': 1, 'File': 'ff4f844fd3_1.npy', ... 'Mask_size': 14989.0},
# ...
# {'Id': 'ff4f844fd3', 'Count': 16, 'File': 'ff4f844fd3_16.npy', ... 'Mask_size': 409.0}]


random.shuffle(groups)
# itertools.cycle(iterable) => 
# make an iterator returning elements from the iterable and saving a copy of each. -
# - when the iterable is exhausted, return elements from the saved copy. repeats indefinitely.
groups = cycle(groups)

dataset, iterator = [], 0
while len(dataset) <= 500000:
    # next(iterator, default) => retrieve the next item from the iterator.  
    group = next(groups)    
    # pick random sample from list of dictionaries of a particular Id.
    sample = random.choice(group)
    # sample => {'Id': '267f36cd04', 'Count': 0, 'File': '267f36cd04_0.npy', ..., 'Mask_size': 4239.0}
    dataset.append(sample)
    iterator += 1


# pd.DataFrame(dataset).head(3) =>
#            Id  Count              File  ...  Mask_size
# 0  4025712647      0  4025712647_0.npy  ...  3943.0
# 1  6d9b1fc826      0  6d9b1fc826_0.npy  ...  7536.0
# 2  36f2e89275      0  36f2e89275_0.npy  ...  5497.0 

# dict(pd.DataFrame(dataset)) =>
# {'Id': 0         4025712647
# 1         6d9b1fc826
#              ...    
# 500000    7d42d53fdd
# Name: Id, Length: 500001, dtype: object, 
# 'Count': 0         0
# 1         0
#          ...
# 500000    0
# Name: Count, Length: 500001, dtype: int64, 
# 'File': 0         4025712647_0.npy
# 1         6d9b1fc826_0.npy
#                 ...       
# 500000    7d42d53fdd_0.npy
# Name: File, Length: 500001, dtype: object, 
# ...,
# 'Mask_size': 0          3943.0
# 1          7536.0
#            ...   
# 500000    14337.0
# Name: Mask_size, Length: 500001, dtype: float64}

# len(dataset) => 500001
# .from_tensor_slices(..) =>  creates a Dataset whose elements are slices of the given tensors.
dataset = tf.data.Dataset.from_tensor_slices(dict(pd.DataFrame(dataset)))
# dataset.map() => Maps map_func across the elements of the dataset.
# under tpu_strategy.scope(), we have to adjust batch_size with replicas.
dataset = dataset.map(read).batch(TPU_BATCH_SIZE*tpu_strategy.num_replicas_in_sync if CFG['TPU'] else GPU_BATCH_SIZE, drop_remainder=True)

In [13]:
# for element in dataset:    
#     # element['Id'] => tf.Tensor(b'267f36cd04', shape=(), dtype=string)
#     if element['Id'].numpy().decode("utf-8")=='ff4f844fd3': 
#         print(element)
#         read(element)
#         break

**Train**

In [15]:
'''
loss_function args exp

real is a tensor with the shape (GPU_BATCH_SIZE, CFG['block_size'] // CFG['patch_size'], 5) where the last axis means:
0 - StartHesitation 
1 - Turn
2 - Walking
3 - Valid
4 - Mask

output is a tensor with the shape (GPU_BATCH_SIZE, CFG['block_size'] // CFG['patch_size'], 3) where the last axis means:
0 - StartHesitation predicted
1 - Turn predicted
2 - Walking predicted

'''

ce = tf.keras.losses.BinaryCrossentropy(reduction='none')

def loss_function(real, output, name='loss_function'):
    # real.shape, output.shape => (4, 864, 5) (4, 864, 3)
    loss = ce(tf.expand_dims(real[:, :, 0:3], axis=-1), tf.expand_dims(output, axis=-1)) # Example shape (32, 864, 3)
    
    mask = tf.math.multiply(real[:, :, 3], real[:, :, 4]) # Example shape (32, 864)
    mask = tf.cast(mask, dtype=loss.dtype)
    mask = tf.expand_dims(mask, axis=-1) # Example shape (32, 864, 1)
    
    # output tensor's 2'th dimension has 1 * 3 elements, -
    # - and the values of input are kept 3 times along the '2'th dimension.    
    mask = tf.tile(mask, multiples=[1, 1, 3]) # Example shape (32, 864, 3)
    loss *= mask # Example shape (32, 864, 3)
    # loss.shape => (4, 864, 3)
    
    # tf.reduce_sum(loss) => tf.Tensor(1267.8462, shape=(), dtype=float32)
    # tf.reduce_sum(loss).numpy() => 1267.8462
    # tf.reduce_sum(mask).numpy() => 5994.0
    return tf.reduce_sum(loss) / tf.reduce_sum(mask)

'''
Simple learning rate schedule with warm up steps

'''
        
class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
    def __init__(self, initial_lr, warmup_steps=1):
        super(CustomSchedule, self).__init__()

        self.initial_lr = tf.cast(initial_lr, tf.float32)
        self.warmup_steps = tf.cast(warmup_steps, tf.float32)

    def __call__(self, step):
        step = tf.cast(step, tf.float32)
        # returns the min of x and y (i.e. x < y ? x : y) element-wise.
        return tf.math.minimum(self.initial_lr, self.initial_lr * (step/self.warmup_steps))  
    

'''
PredictionFnCallback is used for:
1. Loading validation data
2. FOGModel data preparation
3. Prediction
4. Scoring and save

'''

class PredictionFnCallback(tf.keras.callbacks.Callback):
    
    def __init__(self, model=None, verbose=0):
        
        if not model is None: self.model = model
        self.verbose = verbose
         
        def init(Id, path):
            series = pd.read_csv(path).reset_index(drop=True)
            series['Id'] = Id
            series['AccV'] = sample_normalize(series['AccV'].values)
            series['AccML'] = sample_normalize(series['AccML'].values)
            series['AccAP'] = sample_normalize(series['AccAP'].values)
            # series[['StartHesitation', 'Turn', 'Walking']].head(3) => 
            #                StartHesitation  Turn  Walking
            # 0                0              0        0
            # 1                0              0        0
            # 2                0              0        0
            
            # aggregate using one or more operations "over" the specified axis. axis=1 i.e., check max in every row.
            series['Event'] = series[['StartHesitation', 'Turn', 'Walking']].aggregate('max', axis=1)
            # series.head(3) =>
            #    Time      AccV     AccML     AccAP  StartHesitation  Turn  Walking  Id            Event
            # 0     0  0.714180 -0.175363 -2.228391                0     0        0  02edc527c0      0   
            # 1     1 -0.821811  0.636446 -2.110201                0     0        0  02edc527c0      0   
            # 2     2 -0.171170  0.285309 -1.933505                0     0        0  02edc527c0      0    
            
            series_blocks=[]
            for block in get_blocks(series, ['AccV', 'AccML', 'AccAP']): # Example shape (15552, 3)
                values = tf.reshape(block['values'], shape=(CFG['block_size'] // CFG['patch_size'], CFG['patch_size'], 3)) # Example shape (864, 18, 3)
                values = tf.reshape(values, shape=(CFG['block_size'] // CFG['patch_size'], CFG['patch_size']*3)) # Example shape (864, 54)
                values = tf.expand_dims(values, axis=0) # Example shape (1, 864, 54)
                
                self.blocks.append(values)
                series_blocks.append((self.blocks_counter, block['begin'], block['end']))
                self.blocks_counter += 1
            
            description = {}
            description['series'] = series
            description['series_blocks'] = series_blocks
            self.descriptions.append(description)
            
        self.descriptions = [] # Blocks metadata
        self.blocks = [] # Validation data blocks
        self.blocks_counter=0 # Blocks counter
        
        tsfog_ids = val_ids
        #tsfog_paths = [f'train/tdcsfog/{tsfog_id}.csv' for tsfog_id in tsfog_ids]
        tsfog_paths = [f'/kaggle/input/tlvmc-parkinsons-freezing-gait-prediction/train/tdcsfog/{tsfog_id}.csv' for tsfog_id in tsfog_ids]
        
        # "disable" argument which you can set to True to silence any tqdm output         
        for tsfog_id, tsfog_path in tqdm(zip(tsfog_ids, tsfog_paths), total=len(tsfog_ids), desc='PredictionFnCallback Initialization', disable=1-verbose): 
            init(tsfog_id, tsfog_path)
            
        self.blocks = tf.concat(self.blocks, axis=0) # Example shape (self.blocks_counter, 864, 54)
        
        '''
        self.blocks is padded so that the final length is divisible by inference batch size for error-free operation of model.predict function
        Padded values have no effect on the predictions
        
        '''
        # under tpu_strategy.scope(), we have to adjust batch_size with replicas.
        self.blocks = tf.pad(self.blocks, 
                             paddings=[[0, math.ceil(self.blocks_counter / (TPU_BATCH_SIZE*tpu_strategy.num_replicas_in_sync if CFG['TPU'] else GPU_BATCH_SIZE))*(TPU_BATCH_SIZE*tpu_strategy.num_replicas_in_sync if CFG['TPU'] else GPU_BATCH_SIZE)-self.blocks_counter], 
                                                    [0, 0], 
                                                    [0, 0],
                                      ]) # Example shape (self.blocks_counter+pad_value, 864, 54)
        
        print(f'\n[EventPredictionFnCallback Initialization] [Series] {len(self.descriptions)} [Blocks] {self.blocks_counter}\n')
    
    def prediction(self):
        # under tpu_strategy.scope(), we have to adjust batch_size with replicas.
        predictions = model.predict(self.blocks, batch_size=TPU_BATCH_SIZE*tpu_strategy.num_replicas_in_sync if CFG['TPU'] else GPU_BATCH_SIZE, verbose=self.verbose) # Example shape (self.blocks_counter+pad_value, 864, 3)
        predictions = tf.expand_dims(predictions, axis=-1) # Example shape (self.blocks_counter+pad_value, 864, 3, 1)
        predictions = tf.transpose(predictions, perm=[0, 1, 3, 2]) # Example shape (self.blocks_counter+pad_value, 864, 1, 3)
        
        # output tensor's 2'th dimension has 1 * CFG['patch_size'] elements, -
        # - and the values of input are kept CFG['patch_size'] times along the '2'th dimension.
        predictions = tf.tile(predictions, multiples=[1, 1, CFG['patch_size'], 1]) # Example shape (self.blocks_counter+pad_value, 864, 18, 3)
        predictions = tf.reshape(predictions, shape=(predictions.shape[0], predictions.shape[1]*predictions.shape[2], 3)) # Example shape (self.blocks_counter+pad_value, 15552, 3)
        predictions = predictions.numpy()
        
        '''
        The following function aggregates predictions blocks and creates dataframes with StartHesitation_prediction, Turn_prediction, Walking_prediction columns.
        
        '''
        
        def create_target(description):
            series, series_blocks = description['series'].copy(), description['series_blocks']

            # series_blocks[-1] => (0, 0, 15552)
            # series_blocks[-1][2] => 15552 
            values = np.zeros((series_blocks[-1][2], 4))
            # values.shape => (15552, 4)
            # predictions[0].shape => (15552, 3)
            # predictions[0] =>
            # [[0.43343315 0.4191133  0.4423046 ]
            #  ...
            #  [0.48576653 0.48780546 0.4736595 ]]            
            
            for series_block in series_blocks:
                i, begin, end = series_block
                values[begin:end, 0:3] += predictions[i]
                values[begin:end, 3] += 1

            values = values[:len(series)]
            
            series['StartHesitation_prediction'] = values[:, 0] / values[:, 3]
            series['Turn_prediction'] = values[:, 1] / values[:, 3]
            series['Walking_prediction'] = values[:, 2] / values[:, 3]
            series['Prediction_count'] = values[:, 3]
            series['Event_prediction'] = series[['StartHesitation_prediction', 'Turn_prediction', 'Walking_prediction']].aggregate('max', axis=1)
            
            return series
        
        targets = Parallel(n_jobs=-1, verbose=0
                          )(delayed(create_target)(self.descriptions[i]) for i in tqdm(range(len(self.descriptions)), disable=1-self.verbose))        
                      
        targets = pd.concat(targets)
        
        return targets
    
    def on_epoch_end(self, epoch, logs=None):
        scores=[]
        scores.append(f'{(epoch+1):03d}')
        
        loss = logs['loss'] if epoch >= 0 else 1.0
        
        targets = self.prediction()
        
        # Score            
        StartHesitation_mAP = average_precision_score(targets['StartHesitation'], targets['StartHesitation_prediction'])
        Turn_mAP = average_precision_score(targets['Turn'], targets['Turn_prediction'])
        Walking_mAP = average_precision_score(targets['Walking'], targets['Walking_prediction'])
        mAP = (Walking_mAP+Turn_mAP+StartHesitation_mAP)/3

        print(f'\n\n[0] StartHesitation mAP - {StartHesitation_mAP:.3f}, Turn mAP - {Turn_mAP:.3f}, Walking mAP - {Walking_mAP:.3f}, mAP - {mAP:.3f}')
        
        scores.append(f'{mAP:.3f}')
        
        # Score        
        Event_mAP = average_precision_score(targets['Event'], targets['Event_prediction'])        
        print(f'[1] Event mAP - {Event_mAP:.3f}\n')        
        scores.append(f'{Event_mAP:.3f}')
        
        # Save
        scores.append(f'{loss:.4f}')
        
        save_name = '_'.join(scores)
        #save_path = f'./kaggle/working/{save_name}_model.h5'
        save_path = f'/kaggle/working/{save_name}_model.h5'
        self.model.save_weights(save_path)
        
'''
Training
        
'''

if CFG['TPU']:
    with tpu_strategy.scope():        
        # under tpu_strategy.scope(), FOGModel().model() will not accept provided batch size i.e., it automatically adjusts batch_size = 32/replicas.
        #model = FOGModel().model()
        model = FOGModel()
        model.build(input_shape=(GPU_BATCH_SIZE, CFG['block_size'] // CFG['patch_size'], CFG['patch_size']*3))
        
        model.summary(expand_nested=True)
        if len(WEIGHTS): model.load_weights(WEIGHTS)
        model.compile(loss=loss_function, optimizer=tf.keras.optimizers.Adam(learning_rate=CustomSchedule(LEARNING_RATE, WARMUP_STEPS), beta_1=0.9, beta_2=0.98, epsilon=1e-9))
        #!rm -r /kaggle/working/*
        model.fit(dataset, epochs=EPOCHS, steps_per_epoch=STEPS_PER_EPOCH, callbacks=[PredictionFnCallback()])
else:
    model = FOGModel()    
    model= model.model()
    ##model.build(input_shape=(GPU_BATCH_SIZE, CFG['block_size'] // CFG['patch_size'], CFG['patch_size']*3))
    
    # plot graph - visual plot    
    model.summary(expand_nested=True)
    #tf.keras.utils.plot_model(model, to_file="model.jpg", expand_nested=True, show_shapes=True)
    
    if len(WEIGHTS): model.load_weights(WEIGHTS)
    model.compile(loss=loss_function, optimizer=tf.keras.optimizers.Adam(learning_rate=CustomSchedule(LEARNING_RATE, WARMUP_STEPS), beta_1=0.9, beta_2=0.98, epsilon=1e-9),run_eagerly=True) # run_eagerly=True,
    
    #!rm -r /kaggle/working/* # comment by me
#     model.fit(dataset, epochs=EPOCHS, steps_per_epoch=STEPS_PER_EPOCH, callbacks=[PredictionFnCallback()])

Model: "fog_model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 fog_encoder_1 (FOGEncoder)  multiple                  17744000  
|¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯|
| dense_12 (Dense)          multiple                  17600     |
|                                                               |
| add_6 (Add)               multiple                  0         |
|                                                               |
| dropout_11 (Dropout)      multiple                  0         |
|                                                               |
| sequential_12 (Sequential)  (16, 864, 320)          13348800  |
||¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯||
|| encoder_layer_5 (EncoderLay  (16, 864, 320)       2669760   ||
|| er)                                                         ||
|||¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯

2023-07-14 14:49:37.413378: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
2023-07-14 14:49:37.790369: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.




2023-07-14 14:50:25.039426: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
2023-07-14 14:50:25.150528: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.




[0] StartHesitation mAP - 0.024, Turn mAP - 0.514, Walking mAP - 0.085, mAP - 0.208
[1] Event mAP - 0.572

Epoch 2/50

[0] StartHesitation mAP - 0.353, Turn mAP - 0.752, Walking mAP - 0.116, mAP - 0.407
[1] Event mAP - 0.868

Epoch 3/50

[0] StartHesitation mAP - 0.431, Turn mAP - 0.820, Walking mAP - 0.080, mAP - 0.444
[1] Event mAP - 0.904

Epoch 4/50

[0] StartHesitation mAP - 0.047, Turn mAP - 0.782, Walking mAP - 0.076, mAP - 0.302
[1] Event mAP - 0.886

Epoch 5/50

[0] StartHesitation mAP - 0.141, Turn mAP - 0.784, Walking mAP - 0.103, mAP - 0.343
[1] Event mAP - 0.904

Epoch 6/50

[0] StartHesitation mAP - 0.131, Turn mAP - 0.836, Walking mAP - 0.143, mAP - 0.370
[1] Event mAP - 0.901

Epoch 7/50

[0] StartHesitation mAP - 0.150, Turn mAP - 0.806, Walking mAP - 0.125, mAP - 0.360
[1] Event mAP - 0.912

Epoch 8/50

[0] StartHesitation mAP - 0.285, Turn mAP - 0.844, Walking mAP - 0.161, mAP - 0.430
[1] Event mAP - 0.895

Epoch 9/50

[0] StartHesitation mAP - 0.264, Turn mAP - 0.

KeyboardInterrupt: 

In [16]:
# this cell is just for viewing nested model summary.
EncoderLayer().model().summary(expand_nested=True)

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(16, 864, 320)]     0           []                               
                                                                                                  
 multi_head_attention_10 (Multi  (16, 864, 320)      2463680     ['input_1[0][0]',                
 HeadAttention)                                                   'input_1[0][0]',                
                                                                  'input_1[0][0]']                
                                                                                                  
 add_12 (Add)                   (16, 864, 320)       0           ['input_1[0][0]',                
                                                                  'multi_head_attention_10[0][

In [17]:
# this cell is just for viewing nested model summary.
FOGEncoder().model().summary(expand_nested=True)

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(16, 864, 54)]           0         
                                                                 
 tf.math.truediv (TFOpLambda  (16, 864, 54)            0         
 )                                                               
                                                                 
 dense_26 (Dense)            (16, 864, 320)            17600     
                                                                 
 add_13 (Add)                (16, 864, 320)            0         
                                                                 
 dropout_24 (Dropout)        (16, 864, 320)            0         
                                                                 
 sequential_20 (Sequential)  (16, 864, 320)            13348800  
|¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯

In [None]:
#####

In [16]:
# for element in dataset:
#     # element[0].shape, element[1].shape => (4, 864, 54) (4, 864, 5)
#     model.fit(element[0],element[1], callbacks=[PredictionFnCallback()])
#     break


(16, 864, 54) (16, 864, 5)


In [None]:
## convert to onnx so that graph can be viewed at https://netron.app/.
# import tf2onnx
# import onnxruntime as rt

# spec = (tf.TensorSpec((None, CFG['block_size'] // CFG['patch_size'], CFG['patch_size']*3), tf.float32, name="input"),)
# output_path = "FOGModel.onnx"

# model_proto, _ = tf2onnx.convert.from_keras(model, input_signature=spec, opset=13, output_path=output_path)
# output_names = [n.name for n in model_proto.graph.output]

**Search models**

In [None]:
'''
Search for saved models in the working directory and sort them

'''

models = []
for fname in os.listdir('/kaggle/working/'):
    if 'model.h5' in fname:
        m = {}
        m['Path'] = '/kaggle/working/' + fname
        for i, elem in enumerate(fname.split('_')): 
            try:
                m[i+1] = float(elem)
            except:
                m[i+1] = elem
        models.append(m)

if len(models): 
    models = pd.DataFrame(models)
    plot(models.sort_values(1)[2].values)
    models = models.sort_values(2, ascending=False)
    display(models.head(15))