In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sktime.classification.deep_learning.inceptiontime import InceptionTimeClassifier
from sklearn.model_selection import GridSearchCV

# Load the two datasets into pandas dataframes

In [2]:
## Loading the dataset
data = np.loadtxt("data/HT_Sensor_dataset.dat", skiprows=1)

dataset = pd.DataFrame(
    {'id': data[:, 0],
     'time': data[:, 1],
     'r1': data[:, 2],
     'r2': data[:, 3],
     'r3': data[:, 4],
     'r4': data[:, 5],
     'r5': data[:, 6],
     'r6': data[:, 7],
     'r7': data[:, 8],
     'r8': data[:, 9],
     'temp': data[:, 10],
     'humidity': data[:, 11]
    })
dataset['id'] = dataset['id'].astype(int)

In [3]:
## Importing dataset
metadata = np.loadtxt("data/HT_Sensor_metadata.dat", skiprows=1, dtype=str)

metadataset = pd.DataFrame(
    {'id': metadata[:, 0],
     'date': metadata[:, 1],
     'class': metadata[:, 2],
     'start_time_hours': metadata[:, 3],
     'duration_hours': metadata[:, 4]
    })
metadataset['id'] = metadataset['id'].astype(int)

#Apparently no data
metadataset = metadataset[metadataset['id'] != 95]

In [4]:
joined_df = pd.merge(dataset, metadataset, left_on='id', right_on='id', how='inner')
joined_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 928991 entries, 0 to 928990
Data columns (total 16 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   id                928991 non-null  int64  
 1   time              928991 non-null  float64
 2   r1                928991 non-null  float64
 3   r2                928991 non-null  float64
 4   r3                928991 non-null  float64
 5   r4                928991 non-null  float64
 6   r5                928991 non-null  float64
 7   r6                928991 non-null  float64
 8   r7                928991 non-null  float64
 9   r8                928991 non-null  float64
 10  temp              928991 non-null  float64
 11  humidity          928991 non-null  float64
 12  date              928991 non-null  object 
 13  class             928991 non-null  object 
 14  start_time_hours  928991 non-null  object 
 15  duration_hours    928991 non-null  object 
dtypes: float64(11), int6

In [5]:
def add_prefix(row):
    if row['time'] < 0:
        return 'background'
    else:
        return row['class']

joined_df['class'] = joined_df.apply(add_prefix, axis=1)
joined_df.head()

Unnamed: 0,id,time,r1,r2,r3,r4,r5,r6,r7,r8,temp,humidity,date,class,start_time_hours,duration_hours
0,0,-0.99975,12.8621,10.3683,10.4383,11.6699,13.4931,13.3423,8.04169,8.73901,26.2257,59.0528,07-04-15,background,13.49,1.64
1,0,-0.999472,12.8617,10.3682,10.4375,11.6697,13.4927,13.3412,8.04133,8.73908,26.2308,59.0299,07-04-15,background,13.49,1.64
2,0,-0.999194,12.8607,10.3686,10.437,11.6696,13.4924,13.3405,8.04101,8.73915,26.2365,59.0093,07-04-15,background,13.49,1.64
3,0,-0.998916,12.8602,10.3686,10.437,11.6697,13.4921,13.3398,8.04086,8.73936,26.2416,58.9905,07-04-15,background,13.49,1.64
4,0,-0.998627,12.8595,10.3688,10.4374,11.6699,13.4919,13.339,8.04087,8.73986,26.2462,58.9736,07-04-15,background,13.49,1.64


In [6]:
X = []
y = []
snippet_size = 480
gap_size = 30
for id in joined_df["id"].unique():
    temp_df = joined_df[joined_df["id"] == id]
    for start_index in range(0, len(temp_df) - snippet_size, gap_size):
        if temp_df.iloc[start_index,1] < 0 && temp_df.iloc[end_index,1] > 0:
            break
        end_index = start_index + snippet_size
        snippet = temp_df.iloc[start_index:end_index, 2:12].values
        X.append(snippet)
        y.append(temp_df.iloc[end_index,13])
    
X = np.array(X)
y = np.array(y)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

print(X_train.shape)
print(y_train)

(20601, 480, 10)
['background' 'background' 'background' ... 'banana' 'background'
 'background']


In [None]:
from sktime.classification.kernel_based import Arsenal
from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier
kernel = KNeighborsTimeSeriesClassifier()
kernel.fit(X_train, y_train)
kernel.score(X_test, y_test)

In [None]:
# import pickle

# pickle.dump(kernel, open("trained_model", "wb"))

In [None]:
# loaded = pickle.load(open("trained_model", "rb"))
# test_data = []
# test_data = X[60]
# test_data = test_data[np.newaxis, ...]
# result = loaded.predict(np.nan_to_num(test_data))
# print(result[0])