# SimCLR Constrastive Training Tutorial

In [1]:
%load_ext autoreload
%autoreload 2

## Imports

In [2]:
import pickle
import scipy
import datetime
import tensorflow as tf

In [3]:
# Library scripts
import raw_data_processing
import data_pre_processing
import simclr_models
import simclr_utitlities
import transformations

In [4]:
working_directory = 'test_run/'
dataset_save_path = working_directory


## MotionSense Dataset

In this section, the MotionSense dataset will be downloaded and parsed. The results will then be saved in a python pickle file.
(Note: This section only needs to be run once)

### Downloading & Unzipping

In [5]:
import requests
import zipfile

In [6]:
dataset_url = 'https://github.com/mmalekzadeh/motion-sense/blob/master/data/B_Accelerometer_data.zip?raw=true'

r = requests.get(dataset_url, allow_redirects=True)
with open(working_directory + 'B_Accelerometer_data.zip', 'wb') as f:
    f.write(r.content)

In [7]:

with zipfile.ZipFile(working_directory + 'B_Accelerometer_data.zip', 'r') as zip_ref:
    zip_ref.extractall(working_directory)

### Data Processing

In [8]:
accelerometer_data_folder_path = working_directory + 'B_Accelerometer_data/'
user_datasets = raw_data_processing.process_motion_sense_accelerometer_files(accelerometer_data_folder_path)

test_run/B_Accelerometer_data/dws_1
test_run/B_Accelerometer_data/dws_11
test_run/B_Accelerometer_data/dws_2
test_run/B_Accelerometer_data/jog_16
test_run/B_Accelerometer_data/jog_9
test_run/B_Accelerometer_data/sit_13
test_run/B_Accelerometer_data/sit_5
test_run/B_Accelerometer_data/std_14
test_run/B_Accelerometer_data/std_6
test_run/B_Accelerometer_data/ups_12
test_run/B_Accelerometer_data/ups_3
test_run/B_Accelerometer_data/ups_4
test_run/B_Accelerometer_data/wlk_15
test_run/B_Accelerometer_data/wlk_7
test_run/B_Accelerometer_data/wlk_8


In [9]:
with open(working_directory + 'motion_sense_user_split.pkl', 'wb') as f:
    pickle.dump({
        'user_split': user_datasets,
    }, f)

## Pre-processing

In [10]:
# Parameters
window_size = 400
input_shape = (window_size, 3)

# Dataset Metadata 
transformation_multiple = 1
dataset_name = 'motion_sense.pkl'
dataset_name_user_split = 'motion_sense_user_split.pkl'

label_list = ['null', 'sit', 'std', 'wlk', 'ups', 'dws', 'jog']
label_list_full_name = ['null', 'sitting', 'standing', 'walking', 'walking upstairs', 'walking downstairs', 'jogging']
has_null_class = True

label_map = dict([(l, i) for i, l in enumerate(label_list)])

output_shape = len(label_list)

model_save_name = f"motionsense_acc"

sampling_rate = 50.0
unit_conversion = scipy.constants.g

# a fixed user-split
test_users_fixed = [1, 14, 19, 23, 6]
def get_fixed_split_users(har_users):
    # test_users = har_users[0::5]
    test_users = test_users_fixed
    train_users = [u for u in har_users if u not in test_users]
    return (train_users, test_users)

In [11]:
with open(dataset_save_path + dataset_name_user_split, 'rb') as f:
    dataset_dict = pickle.load(f)
    user_datasets = dataset_dict['user_split']


In [12]:
har_users = list(user_datasets.keys())
train_users, test_users = get_fixed_split_users(har_users)
print(f'Testing: {test_users}, Training: {train_users}')

Testing: [1, 14, 19, 23, 6], Training: [10, 11, 12, 13, 15, 16, 17, 18, 2, 20, 21, 22, 24, 3, 4, 5, 7, 8, 9]


In [13]:
np_train, np_val, np_test = data_pre_processing.pre_process_dataset_composite(
    user_datasets=user_datasets, 
    label_map=label_map, 
    output_shape=output_shape, 
    train_users=train_users, 
    test_users=test_users, 
    window_size=window_size, 
    shift=window_size//2, 
    normalise_dataset=True, 
    verbose=1
)



Test
(array(['dws', 'jog', 'sit', 'std', 'ups', 'wlk'], dtype='<U3'), array([112, 133, 360, 335, 148, 331]))
(array([1, 2, 3, 4, 5, 6]), array([360, 335, 331, 148, 112, 133]))
-----------------
Train
(array(['dws', 'jog', 'sit', 'std', 'ups', 'wlk'], dtype='<U3'), array([ 449,  480, 1282, 1146,  546, 1308]))
(array([1, 2, 3, 4, 5, 6]), array([1282, 1146, 1308,  546,  449,  480]))
-----------------
(4168, 400, 3)
(1043, 400, 3)
(1419, 400, 3)


## SimCLR Training

In [14]:
batch_size = 512
decay_steps = 1000
epochs = 200
temperature = 0.1
trasnformation_indices = [1, 2] # Use Scaling and rotation trasnformation

trasnform_funcs_vectorized = [
    transformations.noise_transform_vectorized, 
    transformations.scaling_transform_vectorized, 
    transformations.rotation_transform_vectorized, 
    transformations.negate_transform_vectorized, 
    transformations.time_flip_transform_vectorized, 
    transformations.time_segment_permutation_transform_improved, 
    transformations.time_warp_transform_low_cost, 
    transformations.channel_shuffle_transform_vectorized
]
transform_funcs_names = ['noised', 'scaled', 'rotated', 'negated', 'time_flipped', 'permuted', 'time_warped', 'channel_shuffled']



In [15]:
start_time = datetime.datetime.now()
start_time_str = start_time.strftime("%Y%m%d-%H%M%S")
tf.keras.backend.set_floatx('float32')

lr_decayed_fn = tf.keras.experimental.CosineDecay(initial_learning_rate=0.1, decay_steps=decay_steps)
optimizer = tf.keras.optimizers.SGD(lr_decayed_fn)
transformation_function = simclr_utitlities.generate_combined_transform_function(trasnform_funcs_vectorized, indices=trasnformation_indices)

base_model = simclr_models.create_base_model(input_shape, model_name="base_model")
simclr_model = simclr_models.attach_simclr_head(base_model)
simclr_model.summary()

trained_simclr_model, epoch_losses = simclr_utitlities.simclr_train_model(simclr_model, np_train[0], optimizer, batch_size, transformation_function, temperature=temperature, epochs=epochs, is_trasnform_function_vectorized=True, verbose=1)

simclr_model_save_path = f"{working_directory}{start_time_str}_simclr.hdf5"
trained_simclr_model.save(simclr_model_save_path)



<function scaling_transform_vectorized at 0x7f5c6e7aef80>
<function rotation_transform_vectorized at 0x7f5c6e7b4050>
Model: "base_model_simclr"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input (InputLayer)           [(None, 400, 3)]          0         
_________________________________________________________________
conv1d (Conv1D)              (None, 377, 32)           2336      
_________________________________________________________________
dropout (Dropout)            (None, 377, 32)           0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 362, 64)           32832     
_________________________________________________________________
dropout_1 (Dropout)          (None, 362, 64)           0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 355, 96)           49248    

## Fine-tuning and Evaluation

### Linear Model

In [16]:

total_epochs = 50
batch_size = 200
tag = "linear_eval"

simclr_model = tf.keras.models.load_model(simclr_model_save_path)
linear_evaluation_model = simclr_models.create_linear_model_from_base_model(simclr_model, output_shape, intermediate_layer=7)

best_model_file_name = f"{working_directory}{start_time_str}_simclr_{tag}.hdf5"
best_model_callback = tf.keras.callbacks.ModelCheckpoint(best_model_file_name,
    monitor='val_loss', mode='min', save_best_only=True, save_weights_only=False, verbose=0
)

training_history = linear_evaluation_model.fit(
    x = np_train[0],
    y = np_train[1],
    batch_size=batch_size,
    shuffle=True,
    epochs=total_epochs,
    callbacks=[best_model_callback],
    validation_data=np_val
)

best_model = tf.keras.models.load_model(best_model_file_name)

print("Model with lowest validation Loss:")
print(simclr_utitlities.evaluate_model_simple(best_model.predict(np_test[0]), np_test[1], return_dict=True))
print("Model in last epoch")
print(simclr_utitlities.evaluate_model_simple(linear_evaluation_model.predict(np_test[0]), np_test[1], return_dict=True))


Train on 4168 samples, validate on 1043 samples
Epoch 1/2
Epoch 2/2
Model with lowest validation Loss:
{'Confusion Matrix': array([[360,   0,   0,   0,   0,   0],
       [ 98, 233,   4,   0,   0,   0],
       [ 11,   0, 320,   0,   0,   0],
       [ 78,   0,  70,   0,   0,   0],
       [ 24,   0,  88,   0,   0,   0],
       [  8,   0, 125,   0,   0,   0]]), 'F1 Macro': 0.3782497450009292, 'F1 Micro': 0.6434108527131783, 'F1 Weighted': 0.5473728720492812, 'Precision': 0.3581574207646541, 'Recall': 0.44371495994348503, 'Kappa': 0.5295256229896295}
Model in last epoch
{'Confusion Matrix': array([[360,   0,   0,   0,   0,   0],
       [ 98, 233,   4,   0,   0,   0],
       [ 11,   0, 320,   0,   0,   0],
       [ 78,   0,  70,   0,   0,   0],
       [ 24,   0,  88,   0,   0,   0],
       [  8,   0, 125,   0,   0,   0]]), 'F1 Macro': 0.3782497450009292, 'F1 Micro': 0.6434108527131783, 'F1 Weighted': 0.5473728720492812, 'Precision': 0.3581574207646541, 'Recall': 0.44371495994348503, 'Kappa':

### Full HAR Model

In [17]:

total_epochs = 50
batch_size = 200
tag = "full_eval"

simclr_model = tf.keras.models.load_model(simclr_model_save_path)
full_evaluation_model = simclr_models.create_full_classification_model_from_base_model(simclr_model, output_shape, model_name="TPN", intermediate_layer=7, last_freeze_layer=4)

best_model_file_name = f"{working_directory}{start_time_str}_simclr_{tag}.hdf5"
best_model_callback = tf.keras.callbacks.ModelCheckpoint(best_model_file_name,
    monitor='val_loss', mode='min', save_best_only=True, save_weights_only=False, verbose=0
)

training_history = full_evaluation_model.fit(
    x = np_train[0],
    y = np_train[1],
    batch_size=batch_size,
    shuffle=True,
    epochs=total_epochs,
    callbacks=[best_model_callback],
    validation_data=np_val
)

best_model = tf.keras.models.load_model(best_model_file_name)

print("Model with lowest validation Loss:")
print(simclr_utitlities.evaluate_model_simple(best_model.predict(np_test[0]), np_test[1], return_dict=True))
print("Model in last epoch")
print(simclr_utitlities.evaluate_model_simple(full_evaluation_model.predict(np_test[0]), np_test[1], return_dict=True))


Train on 4168 samples, validate on 1043 samples
Epoch 1/2
Epoch 2/2
Model with lowest validation Loss:
{'Confusion Matrix': array([[333,  27,   0,   0,   0,   0],
       [  0, 335,   0,   0,   0,   0],
       [  1,  75, 247,   8,   0,   0],
       [ 15,  66,  19,  47,   1,   0],
       [ 17,   9,  66,   4,  14,   2],
       [  6,   1,   5,   0,   0, 121]]), 'F1 Macro': 0.6765570974030605, 'F1 Micro': 0.77307963354475, 'F1 Weighted': 0.7432214445809138, 'Precision': 0.8324672930834801, 'Recall': 0.6705942614354127, 'Kappa': 0.7088707765943619}
Model in last epoch
{'Confusion Matrix': array([[333,  27,   0,   0,   0,   0],
       [  0, 335,   0,   0,   0,   0],
       [  1,  75, 247,   8,   0,   0],
       [ 15,  66,  19,  47,   1,   0],
       [ 17,   9,  66,   4,  14,   2],
       [  6,   1,   5,   0,   0, 121]]), 'F1 Macro': 0.6765570974030605, 'F1 Micro': 0.77307963354475, 'F1 Weighted': 0.7432214445809138, 'Precision': 0.8324672930834801, 'Recall': 0.6705942614354127, 'Kappa': 0.708