In [27]:
import pandas as pd
import numpy as np


# Load the two datasets into pandas dataframes

In [28]:
data = np.loadtxt("data/HT_Sensor_dataset.dat", skiprows=1)
columns = ['id', 'time', 'r1', 'r2', 'r3', 'r4', 'r5', 'r6', 'r7', 'r8', 'temp', 'humidity']
dataset = pd.DataFrame(data, columns=columns)
dataset['id'] = dataset['id'].astype(int)


In [29]:
metadata = np.loadtxt("data/HT_Sensor_metadata.dat", skiprows=1, dtype=str)
metadata_columns = ['id', 'date', 'class', 'start_time_hours', 'duration_hours']
metadataset = pd.DataFrame(metadata, columns=metadata_columns)
metadataset['id'] = metadataset['id'].astype(int)


## Inner Join the tables to get our main dataset

In [30]:
joined_df = pd.merge(dataset, metadataset, left_on='id', right_on='id', how='inner')
joined_df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 928991 entries, 0 to 928990
Data columns (total 16 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   id                928991 non-null  int64  
 1   time              928991 non-null  float64
 2   r1                928991 non-null  float64
 3   r2                928991 non-null  float64
 4   r3                928991 non-null  float64
 5   r4                928991 non-null  float64
 6   r5                928991 non-null  float64
 7   r6                928991 non-null  float64
 8   r7                928991 non-null  float64
 9   r8                928991 non-null  float64
 10  temp              928991 non-null  float64
 11  humidity          928991 non-null  float64
 12  date              928991 non-null  object 
 13  class             928991 non-null  object 
 14  start_time_hours  928991 non-null  object 
 15  duration_hours    928991 non-null  object 
dtypes: float64(11), int6

In [31]:
## Set all records with time less than 0 equal to a background class
joined_df.loc[joined_df['time'] < 0, 'class'] = 'background'

## For each grouping/chunk, the last hour of data is also background
joined_df['duration_hours'] = joined_df['duration_hours'].astype(float)
joined_df.loc[joined_df['time'] > joined_df['duration_hours'], 'class'] = 'background'


In [37]:
X = []
y = []
## How many rows will be used for the prediction
snippet_size = 120
expected_rows_per_minute = 60
expected_snippet_time_gap = 1.05 * snippet_size / expected_rows_per_minute / 60
## How many rows will be skipped between each snippet
gap_size = 12
for start_index in range(0, len(joined_df) - snippet_size, gap_size):
    end_index = start_index + snippet_size
    ## Skip if the data spans across segments
    if joined_df.iloc[end_index,0] != joined_df.iloc[start_index,0]:
        continue
    ## If the time difference from beginning to end is larger than expected
    ## there must be some data missing or this chunk spans across tests
    if joined_df.iloc[end_index,1] - joined_df.iloc[start_index,1] > expected_snippet_time_gap:
        continue
    snippet = joined_df.iloc[start_index:end_index, 2:12].values
    X.append(snippet)
    y.append(joined_df.iloc[end_index,13])
    ## 5x SAMPLE ALL ACTIVE POINTS TO PROMOTE HIGHER ACCURACY
    if joined_df.iloc[end_index,13] != "background":
        X.append(snippet)
        y.append(joined_df.iloc[end_index,13])
        X.append(snippet)
        y.append(joined_df.iloc[end_index,13])
        X.append(snippet)
        y.append(joined_df.iloc[end_index,13])
        X.append(snippet)
        y.append(joined_df.iloc[end_index,13])


X = np.array(X)
y = np.array(y)


In [38]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

print(X_train.shape)
print(X[0][0])
print(y_train)


(109330, 120, 10)
[12.9403  10.501   10.5709  11.822   13.6899  13.4415   8.05928  8.77322
 26.1826  58.9305 ]
['wine' 'wine' 'banana' ... 'background' 'background' 'banana']


In [34]:
from sklearn.metrics import classification_report

from sktime.classification.deep_learning.lstmfcn import LSTMFCNClassifier

model = LSTMFCNClassifier(n_epochs=10, verbose=1)
model.fit(X_train, y_train)
print(model.score(X_test, y_test))




Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_4 (InputLayer)        [(None, 9, 120)]             0         []                            
                                                                                                  
 conv1d_9 (Conv1D)           (None, 9, 128)               123008    ['input_4[0][0]']             
                                                                                                  
 batch_normalization_9 (Bat  (None, 9, 128)               512       ['conv1d_9[0][0]']            
 chNormalization)                                                                                 
                                                                                                  
 activation_9 (Activation)   (None, 9, 128)               0         ['batch_normalization_9[

2023-10-23 22:07:22.139889: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-10-23 22:07:23.615194: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-10-23 22:07:28.974775: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




KeyboardInterrupt: 

In [None]:
model.save("trained_sktime_model")
