<a href="https://colab.research.google.com/github/arbi11/CEFC-2022/blob/main/Timing_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Introduction

In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from time import time

Good to know what version of tensorflow we're using...

In [None]:
print(tf.version.VERSION)

2.9.2


In [None]:
import logging
tf.get_logger().setLevel(logging.INFO)

In [None]:
!lscpu

Architecture:        x86_64
CPU op-mode(s):      32-bit, 64-bit
Byte Order:          Little Endian
CPU(s):              2
On-line CPU(s) list: 0,1
Thread(s) per core:  2
Core(s) per socket:  1
Socket(s):           1
NUMA node(s):        1
Vendor ID:           GenuineIntel
CPU family:          6
Model:               85
Model name:          Intel(R) Xeon(R) CPU @ 2.00GHz
Stepping:            3
CPU MHz:             2000.204
BogoMIPS:            4000.40
Hypervisor vendor:   KVM
Virtualization type: full
L1d cache:           32K
L1i cache:           32K
L2 cache:            1024K
L3 cache:            39424K
NUMA node0 CPU(s):   0,1
Flags:               fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_si

# Data Acquisition and Cleanup

In [None]:
for (name, (train_url, test_url)) in {
    'M19': ('https://docs.google.com/spreadsheets/d/e/2PACX-1vTX1O03TCJYomYg1QU6SpGi-OhR_V3Kkx1Xkc2JUpaWaLlztDSLMn6eFXyFmaVZrmRIJjGoqY5LkENP/pub?gid=1103714291&single=true&output=csv',
            'https://docs.google.com/spreadsheets/d/e/2PACX-1vQQFzZz0My7iSpzHtl2K2m-2oBCHT--nqaXZiPxct4sJO2CPQWiRD6HRA8AeJHHQkY6bcu9lxatiLaP/pub?gid=1992106649&single=true&output=csv'),
  }.items():
  print(name)

  # *  Get rid of the 5 useless rows at the beginning of each file
  # *  Normalize the column names to 't', 'Hx', 'Bx', 'Hy', 'By', 'Hz', 'Bz'
  df = pd.read_csv(train_url, skiprows=5, usecols=range(1, 8))
  df.to_csv(f"{name}_train.csv", index=False)
  print(f"  train set: {df.shape}")

  df = pd.read_csv(test_url, skiprows=5, usecols=range(1, 8))
  df.rename(columns={'H x': 'Hx', 'B x': 'Bx', 'H y': 'Hy', 'B y': 'By', 'H z': 'Hz', 'B z': 'Bz'}, inplace=True)
  df.to_csv(f"{name}_test.csv", index=False)
  print(f"  test set:  {df.shape}")

print("\nDone")

M19
  train set: (2001, 7)
  test set:  (1178, 7)

Done


And the M6 dataset with 20000 data points...

In [None]:
df = pd.read_csv('https://docs.google.com/spreadsheets/d/e/2PACX-1vR4v2LZFBH-tn1nSFUkL8M3cOZSh1I1jQzXsoLyGtdt--hAgyfEYYk087pFB7iatP7FvJsdM9fvfzAG/pub?gid=1926006206&single=true&output=csv',
                 skiprows=5, usecols=range(1, 8))
df.to_csv("M6_train20000.csv", index=False)
print(f"M6 (20000 data point version): {df.shape}")
del df

M6 (20000 data point version): (20001, 7)


# Training and Testing Sets

In [None]:
def generate_timesteps(df, window=1):
  assert window > 0

  y = np.zeros((len(df), window, 1))
  x = np.zeros((len(df), window, 2))

  for w in range(window):
    y[w:, window - w - 1, 0] = df['Bx'][:len(df) - w]

  for w in range(window):
    x[w:, window - w - 1, 0] = df['Hx'][:len(df) - w]

  for w in range(window):
    x[w + 1:, window - w - 1, 1] = df['Bx'][:len(df) - w - 1]

  return (x, y[:, -1, :])

## M19

In [None]:
(M19_train_X, M19_train_Y) = generate_timesteps(pd.read_csv('./M19_train.csv'), window=30)

M19_fat_X = np.concatenate([
    M19_train_X,
    np.zeros((M19_train_X.shape[0], M19_train_X.shape[1], 4))
], axis=2)

print("Training Set")
print(f"  X: {M19_train_X.shape} or {M19_fat_X.shape}")
print(f"  Y: {M19_train_Y.shape}")

Training Set
  X: (2001, 30, 2) or (2001, 30, 6)
  Y: (2001, 1)


# ML Models

## Temporal Convolution Network (CNV4)

In [None]:
def new_model_cnv4():
  input = tf.keras.layers.Input(shape=(7, 2))

  x = input
  for i in range(4):
    x = tf.keras.layers.Conv1D(128, kernel_size=(4,), padding='causal', activation='swish')(x)
    y = tf.keras.layers.Conv1D(128, kernel_size=(4,), padding='causal', activation='swish')(x)
    x = tf.keras.layers.Add()([x, y])

  x = tf.keras.layers.Conv1D(128, kernel_size=(3,), activation='swish')(x)
  x = tf.keras.layers.Dense(128, activation='swish')(x)

  x = tf.keras.layers.Conv1D(128, kernel_size=(3,), activation='swish')(x)
  x = tf.keras.layers.Dense(128, activation='swish')(x)

  x = tf.keras.layers.Conv1D(128, kernel_size=(3,), activation='swish')(x)
  x = tf.keras.layers.Dense(128, activation='swish')(x)

  x = tf.keras.layers.Dense(1, activation='linear')(x)
  x = tf.keras.layers.Flatten()(x)

  model = tf.keras.Model(inputs=input, outputs=x)
  return model

new_model_cnv4().summary()

Model: "model_11"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_12 (InputLayer)          [(None, 7, 2)]       0           []                               
                                                                                                  
 conv1d_44 (Conv1D)             (None, 7, 128)       1152        ['input_12[0][0]']               
                                                                                                  
 conv1d_45 (Conv1D)             (None, 7, 128)       65664       ['conv1d_44[0][0]']              
                                                                                                  
 add_16 (Add)                   (None, 7, 128)       0           ['conv1d_44[0][0]',              
                                                                  'conv1d_45[0][0]']       

## LSTM

In [None]:
def new_model_lstm():
  return tf.keras.Sequential([
    tf.keras.layers.LSTM(400, input_shape=(None, 2), return_sequences=True),
    tf.keras.layers.LSTM(400),
    tf.keras.layers.Dense(1),
  ])

new_model_lstm().summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_4 (LSTM)               (None, None, 400)         644800    
                                                                 
 lstm_5 (LSTM)               (None, 400)               1281600   
                                                                 
 dense_22 (Dense)            (None, 1)                 401       
                                                                 
Total params: 1,926,801
Trainable params: 1,926,801
Non-trainable params: 0
_________________________________________________________________


## GRU1

In [None]:
def new_model_gru1():
  input = tf.keras.Input(shape=(None, 2))

  x = tf.keras.layers.GRU(32, return_sequences=True)(input)
  x = tf.keras.layers.GRU(1)(x)

  model = tf.keras.Model(input, x)
  return model

new_model_gru1().summary()

Model: "model_19"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_20 (InputLayer)       [(None, None, 2)]         0         
                                                                 
 gru_26 (GRU)                (None, None, 32)          3456      
                                                                 
 gru_27 (GRU)                (None, 1)                 105       
                                                                 
Total params: 3,561
Trainable params: 3,561
Non-trainable params: 0
_________________________________________________________________


## Bidi GRU

In [None]:
def new_model_bidi():
  input = tf.keras.Input(shape=(None, 6))

  x = tf.keras.layers.GRU(32, return_sequences=True)(input)
  x = tf.keras.layers.Bidirectional(
        tf.keras.layers.GRU(1),
        merge_mode='ave')(x)

  model = tf.keras.Model(input, x)
  return model

new_model_bidi().summary()

Model: "model_13"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_14 (InputLayer)       [(None, None, 6)]         0         
                                                                 
 gru_16 (GRU)                (None, None, 32)          3840      
                                                                 
 bidirectional_5 (Bidirectio  (None, 1)                210       
 nal)                                                            
                                                                 
Total params: 4,050
Trainable params: 4,050
Non-trainable params: 0
_________________________________________________________________


## RNN + Transformer

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

class TransformerEncoder(layers.Layer):
    def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):
        super().__init__(**kwargs)
        self.embed_dim = embed_dim
        self.dense_dim = dense_dim
        self.num_heads = num_heads
        self.attention = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=embed_dim)
        self.dense_proj = keras.Sequential(
            [layers.Dense(dense_dim, activation="relu"),
             layers.Dense(embed_dim),]
        )
        self.layernorm_1 = layers.LayerNormalization()
        self.layernorm_2 = layers.LayerNormalization()

    def call(self, inputs, mask=None):
        if mask is not None:
            mask = mask[:, tf.newaxis, :]
        attention_output = self.attention(
            inputs, inputs, attention_mask=mask)
        proj_input = self.layernorm_1(inputs + attention_output)
        proj_output = self.dense_proj(proj_input)
        return self.layernorm_2(proj_input + proj_output)

    def get_config(self):
        config = super().get_config()
        config.update({
            "embed_dim": self.embed_dim,
            "num_heads": self.num_heads,
            "dense_dim": self.dense_dim,
        })
        return config

In [None]:
def new_model_rtr1():
  input = tf.keras.Input(shape=(None, 6))
  x = layers.GRU(32, return_sequences=True)(input)
  x = TransformerEncoder(32, 32, 4)(x)
  outputs = layers.GRU(1)(x)
  model = keras.Model(input, outputs)
  return model

new_model_rtr1().summary()

Model: "model_20"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_21 (InputLayer)       [(None, None, 6)]         0         
                                                                 
 gru_28 (GRU)                (None, None, 32)          3840      
                                                                 
 transformer_encoder_2 (Tran  (None, None, 32)         19040     
 sformerEncoder)                                                 
                                                                 
 gru_29 (GRU)                (None, 1)                 105       
                                                                 
Total params: 22,985
Trainable params: 22,985
Non-trainable params: 0
_________________________________________________________________


# Utilities

In [None]:
def compile_and_fit(model, train_X, train_Y,
                    epochs=10,
                    patience=10,
                    min_delta=0,
                    batch_size=50,
                    seed=9763):
  if seed is not None:
    np.random.seed(seed)
    tf.random.set_seed(seed)

  model.compile(loss='mse', optimizer='adam')
  early_stopping = tf.keras.callbacks.EarlyStopping(monitor='loss',
                                                    mode='auto',
                                                    verbose=0,
                                                    min_delta=min_delta,
                                                    patience=patience)
  
  # sneaky normalization
  train_X = np.array(train_X, copy=True)
  train_Y = np.array(train_Y, copy=True)
  train_X[:, :, 1] /= 1.5
  train_Y /= 1.5
  np.clip(train_Y, -1, 1, out=train_Y)

  return model.fit(train_X, train_Y,
                   epochs=epochs,
                   batch_size=batch_size,
                   callbacks=[early_stopping])

In [None]:
def time_model(model, steps, cycles=100, features=2):
  assert features >= 2

  for i in range(cycles):
    input = np.random.rand(1, steps, features)
    input[:, :, 0] = input[:, :, 0] * 1600 - 800
    input[:, :, 1] = input[:, :, 1] * 2 - 1

    model.predict(input)

In [None]:
def sketchy_timing_stats(output):
  import re
  pattern = re.compile('(\d+)s (\d+)ms')

  q = []
  for line in output.split('\n'):
    if line:
      m = re.search(pattern, line)
      v = int(m.group(1)) * 1000 + int(m.group(2))
      q.append(v)

  print(f"avg: {np.average(q)}")
  print(f"std: {np.std(q)}")

# Model Performance

## CNV4

Note that CNV4 uses 7 timesteps while the data has 30. Need to only keep the last 7.

In [None]:
model = new_model_cnv4()
history = compile_and_fit(model, M19_train_X[:, -7:, :], M19_train_Y)
model.save('./Model_CNV4_M19')
del model, history

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


INFO:tensorflow:Assets written to: ./Model_CNV4_M19/assets


In [None]:
time_model(tf.keras.models.load_model('./Model_CNV4_M19'), 7)



In [None]:
#...

avg: 20.646464646464647
std: 6.243077995252882


## LSTM

In [None]:
model = new_model_lstm()
history = compile_and_fit(model, M19_train_X, M19_train_Y)
model.save('./Model_LSTM_M19')
del model, history

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


INFO:tensorflow:Assets written to: ./Model_LSTM_M19/assets


In [None]:
time_model(tf.keras.models.load_model('./Model_LSTM_M19'), 30)



In [None]:
#...

avg: 21.414141414141415
std: 7.26971037313695


## GRU1

In [None]:
model = new_model_gru1()
history = compile_and_fit(model, M19_train_X, M19_train_Y)
model.save('./Model_GRU1_M19')
del model, history

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


INFO:tensorflow:Assets written to: ./Model_GRU1_M19/assets


In [None]:
time_model(tf.keras.models.load_model('./Model_GRU1_M19'), 30)



In [None]:
#...

avg: 17.11111111111111
std: 2.1267364332096235


## Bidi GRU

In [None]:
model = new_model_bidi()
history = compile_and_fit(model, M19_fat_X, M19_train_Y)
model.save('./Model_BIDI_M19')
del model, history

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


INFO:tensorflow:Assets written to: ./Model_BIDI_M19/assets


In [None]:
time_model(tf.keras.models.load_model('./Model_BIDI_M19'), 30, features=6)



In [None]:
#...

avg: 17.97979797979798
std: 1.9897708003626238


## RNN + Transformer

In [None]:
model = new_model_rtr1()
history = compile_and_fit(model, M19_fat_X, M19_train_Y)
model.save('./Model_RTR1_M19')
del model, history

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


INFO:tensorflow:Assets written to: ./Model_RTR1_M19/assets


In [None]:
time_model(tf.keras.models.load_model('./Model_RTR1_M19'), 30, features=6)



In [None]:
#...

avg: 18.555555555555557
std: 2.016208173386055


# The End