In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tslearn
import lightgbm as lgb
from sklearn.metrics import confusion_matrix, classification_report, ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tslearn.shapelets import LearningShapelets, grabocka_params_to_shapelet_size_dict

In [12]:
# load data

X_train = np.load('X_train.npy')
X_test = np.load('X_test.npy')
X_val = np.load('X_val.npy')

y_train  = np.load('y_train.npy')
y_test = np.load('y_test.npy')
y_val = np.load('y_val.npy')

In [15]:
# determine total shapelets and their size r = size
# get a dictionary of how many shapelets to be found for each shapelet length

# Determine the number of time series in the training data
n_ts = X_train.shape[0]  # Number of trials in the training data
ts_sz = X_train.shape[1]  # Length of each time series (2400 as per instance_size)
n_classes = 2  # Assuming you have two classes (e.g., ADL and fall)

# Shapelet parameters
l = 0.15  # Proportion of the time series length used for the smallest shapelet
r = 3     # Maximum size of shapelets
# Get the dictionary of shapelets to be found for each shapelet length
shapelet_dict = grabocka_params_to_shapelet_size_dict(
    n_ts=n_ts, ts_sz=ts_sz, n_classes=n_classes, l=l, r=r
)

# Display the shapelet dictionary
print(shapelet_dict)

{360: 6, 720: 6, 1080: 6}


In [None]:
import datetime

# Set the target size for one instance
instance_size = 2400  # Number of rows per trial
n_features = 6  # Number of features (sensor columns)

# Take the labels for learning shapelets
train_y_learning_shapelets = y_train

# Create the shapelet learner class
# Set weight_regularizer to avoid overfitting
# Set scale to True for scaling the data
trans = LearningShapelets(
    weight_regularizer=0.001,
    batch_size=256,
    max_iter=1500,
    total_lengths=5,
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.003),
    scale=True
)

# Dictionaries for storing shapelets learning history
shapelets_learned = {}
lhist = {}

# ndarrays for storing the transformed dataset
train_shapelets = None
valid_shapelets = None
test_shapelets = None
shapelets_learned_as_ts = None

# Iterate through each sensor axis
for SENSOR in range(n_features):
    # Extract data for the current sensor
    train_data_sensor = X_train[:, :, SENSOR]
    val_data_sensor = X_val[:, :, SENSOR]
    test_data_sensor = X_test[:, :, SENSOR]

    print(f"Now finding shapelets from sensor {SENSOR} ---- {datetime.datetime.now().time()}")

    # Fit the shapelet learner on the training data for the current sensor
    trans.fit(train_data_sensor, train_y_learning_shapelets)

    # Store the learned shapelets and history
    shapelets_learned[f'{SENSOR}'] = trans.shapelets_
    lhist[f'{SENSOR}'] = trans.history_

    # Transform the datasets based on learned shapelets
    if train_shapelets is None:
        train_shapelets = trans.transform(train_data_sensor)
        valid_shapelets = trans.transform(val_data_sensor)
        test_shapelets = trans.transform(test_data_sensor)
        shapelets_learned_as_ts = trans.shapelets_as_time_series_
    else:
        train_shapelets = np.hstack((train_shapelets, trans.transform(train_data_sensor)))
        valid_shapelets = np.hstack((valid_shapelets, trans.transform(val_data_sensor)))
        test_shapelets = np.hstack((test_shapelets, trans.transform(test_data_sensor)))
        shapelets_learned_as_ts = np.vstack((shapelets_learned_as_ts, trans.shapelets_as_time_series_))

print("DONE", datetime.datetime.now().time())

Now finding shapelets from sensor 0 ---- 19:57:13.938791




In [None]:
# Train the model
model = lgbm.LGBMClassifier(n_estimators=3500, random_state=8)
model.fit(train_shapelets, train_y, eval_set=[(valid_shapelets, val_y)], eval_metric='logloss')

# Make predictions on the validation set
valid_predictions = model.predict(valid_shapelets)

In [None]:
# Generate confusion matrix
cm = confusion_matrix(val_y, valid_predictions)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=model.classes_)

In [None]:
# Plot confusion matrix
plt.figure(figsize=(8, 6))
disp.plot(cmap=plt.cm.Blues)
plt.title("Confusion Matrix")
plt.show()

In [None]:
# Print classification report
report = classification_report(valid_y, valid_predictions)
print("Classification Report:")
print(report)