In [37]:
from preprocess import get_prepared_data
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, classification_report

In [None]:
# Load preprocessed, prepared data with original labels
# We want to divide the data into original 5 classes
# And change the shape to (n_samples, 178, 1) because DTW requires 3-dimensional vectors
X, y = get_prepared_data(oversample=False, relabel=False)
X = X.reshape(X.shape[0], X.shape[1], 1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


In [41]:
"""
This class implements a classifier based on Shapelets, which are subsequences 
of time series data with distinctive patterns.

The purpose of the classifier is to identify these subsequences that best represent
 patterns characteristic of different classes in the dataset.
"""

class ImprovedShapeletClassifier:

    # Initialize Shapelet classifier
    def __init__(self, n_shapelets=10):
        self.n_shapelets = n_shapelets       #  number of shapelets
        self.shapelets = None                #  here we store the selected shapelets
        self.shapelet_labels = None          #  labels assigned to each shapelet
        self.classes_ = None                 #  unique class labels

    # Train the classifier by choosing shapelets and assigning classes to them
    # X with shape (n_samples, series_length, 1)
    def fit(self, X, y):
        np.random.seed(42)
        self.classes_ = np.unique(y)
        n_samples, series_length, _ = X.shape
        
        # randomly select shapelets
        shapelet_indices = np.random.choice(n_samples, self.n_shapelets, replace=False)
        self.shapelets = X[shapelet_indices]
        
        # assign labels to shapelets based on nearest samples
        self.shapelet_labels = y[shapelet_indices]

    # Compute the Euclidean distance between the time series sample X shape and the Shapelet (with shape (series_length, 1))
    def _distance_to_shapelet(self, X, shapelet):
        distances = np.sqrt(np.sum((X - shapelet) ** 2, axis=1))
        return np.min(distances)

    # Predict 
    def predict(self, X):
        """
        Dokonuje predykcji na podstawie dopasowania Shapeletów.
        
        :param X: Dane testowe w kształcie (n_samples, series_length, 1).
        :return: Predykcje w kształcie (n_samples,).
        """
        n_samples, series_length, _ = X.shape
        predictions = []

        # compute the distance of each sample to each Shapelet
        for sample in X:
            distances = [self._distance_to_shapelet(sample, shapelet) for shapelet in self.shapelets]
            
            # Find the shapelet with minimum distance and assign it as a predicted label for the sample
            closest_shapelet_idx = np.argmin(distances)
            predictions.append(self.shapelet_labels[closest_shapelet_idx])

        return np.array(predictions)


In [43]:
shapelet_clf = ImprovedShapeletClassifier(n_shapelets=50)

# Training classifier
shapelet_clf.fit(X_train, y_train)

# Prediction
y_pred = shapelet_clf.predict(X_test)



In [None]:
# Evaluation of the results
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
conf_matrix = confusion_matrix(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print()
print(conf_matrix)

plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=shapelet_clf.classes_, yticklabels=shapelet_clf.classes_)
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.show()
