In [14]:
import pandas as pd
import numpy as np


# Load the two datasets into pandas dataframes

In [15]:
data = np.loadtxt("data/HT_Sensor_dataset.dat", skiprows=1)
columns = ['id', 'time', 'r1', 'r2', 'r3', 'r4', 'r5', 'r6', 'r7', 'r8', 'temp', 'humidity']
dataset = pd.DataFrame(data, columns=columns)
dataset['id'] = dataset['id'].astype(int)


In [16]:
metadata = np.loadtxt("data/HT_Sensor_metadata.dat", skiprows=1, dtype=str)
metadata_columns = ['id', 'date', 'class', 'start_time_hours', 'duration_hours']
metadataset = pd.DataFrame(metadata, columns=metadata_columns)
metadataset['id'] = metadataset['id'].astype(int)


## Inner Join the tables to get our main dataset

In [17]:
joined_df = pd.merge(dataset, metadataset, left_on='id', right_on='id', how='inner')
joined_df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 928991 entries, 0 to 928990
Data columns (total 16 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   id                928991 non-null  int64  
 1   time              928991 non-null  float64
 2   r1                928991 non-null  float64
 3   r2                928991 non-null  float64
 4   r3                928991 non-null  float64
 5   r4                928991 non-null  float64
 6   r5                928991 non-null  float64
 7   r6                928991 non-null  float64
 8   r7                928991 non-null  float64
 9   r8                928991 non-null  float64
 10  temp              928991 non-null  float64
 11  humidity          928991 non-null  float64
 12  date              928991 non-null  object 
 13  class             928991 non-null  object 
 14  start_time_hours  928991 non-null  object 
 15  duration_hours    928991 non-null  object 
dtypes: float64(11), int6

In [25]:
## Set all records with time less than 0 equal to a background class
joined_df.loc[joined_df['time'] < 0, 'class'] = 'background'

## For each grouping/chunk, the last hour of data is also background
joined_df['duration_hours'] = joined_df['duration_hours'].astype(float)
joined_df.loc[joined_df['time'] > joined_df['duration_hours'], 'class'] = 'background'


In [26]:
X = []
y = []
## How many rows will be used for the prediction
snippet_size = 120
expected_rows_per_minute = 60
expected_snippet_time_gap = 1.05 * snippet_size / expected_rows_per_minute / 60
## How many rows will be skipped between each snippet
gap_size = 12
for start_index in range(0, len(joined_df) - snippet_size, gap_size):
    end_index = start_index + snippet_size
    ## Skip if the data spans across segments
    if joined_df.iloc[end_index,0] != joined_df.iloc[start_index,0]:
        continue
    ## If the time difference from beginning to end is larger than expected
    ## there must be some data missing or this chunk spans across tests
    if joined_df.iloc[end_index,1] - joined_df.iloc[start_index,1] > expected_snippet_time_gap:
        continue
    snippet = joined_df.iloc[start_index:end_index, 2:11].values
    X.append(snippet)
    y.append(joined_df.iloc[end_index,13])
    ## 5x SAMPLE ALL ACTIVE POINTS TO PROMOTE HIGHER ACCURACY
    if joined_df.iloc[end_index,13] != "background":
        X.append(snippet)
        y.append(joined_df.iloc[end_index,13])
        X.append(snippet)
        y.append(joined_df.iloc[end_index,13])
        X.append(snippet)
        y.append(joined_df.iloc[end_index,13])
        X.append(snippet)
        y.append(joined_df.iloc[end_index,13])


X = np.array(X)
y = np.array(y)


In [20]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

print(X_train.shape)
print(y_train)


(109330, 120, 10)
['wine' 'wine' 'banana' ... 'background' 'background' 'banana']


In [23]:
from sklearn.metrics import classification_report

from sktime.classification.deep_learning.lstmfcn import LSTMFCNClassifier

model = LSTMFCNClassifier(n_epochs=100, verbose=1)
model.fit(X_train, y_train)
print(model.score(X_test, y_test))




Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_3 (InputLayer)        [(None, 10, 120)]            0         []                            
                                                                                                  
 conv1d_6 (Conv1D)           (None, 10, 128)              123008    ['input_3[0][0]']             
                                                                                                  
 batch_normalization_6 (Bat  (None, 10, 128)              512       ['conv1d_6[0][0]']            
 chNormalization)                                                                                 
                                                                                                  
 activation_6 (Activation)   (None, 10, 128)              0         ['batch_normalization_6[

2023-10-23 17:24:26.798785: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-10-23 17:24:27.372692: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-10-23 17:24:31.664944: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


2023-10-23 19:42:46.535968: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-10-23 19:42:46.872508: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


0.973438700471957


In [24]:
model.save("trained_sktime_model")


INFO:tensorflow:Assets written to: trained_sktime_model/keras/assets


INFO:tensorflow:Assets written to: trained_sktime_model/keras/assets


<zipfile.ZipFile filename='trained_sktime_model.zip' mode='r'>