## ROCKET model in this file

In [None]:
import numpy as np
import pandas as pd
from sktime.transformations.panel.rocket import Rocket
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, log_loss, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

#### Load the data

In [None]:
def prep_data_for_rocket(df, pad_length=250):
    X_data = []
    y_data = []
    
    # Get the sequence for each vehicle
    for vehicle_id in df["vehicle_id"].unique():
        vehicle_data = df[df["vehicle_id"] == vehicle_id]
        vehicle_data = vehicle_data.sort_values(by="time_step")
        
        # Extract the features (excluding vehicle_id, time_step, class)
        feature_columns = [col for col in df.columns if col not in ["vehicle_id", "time_step", "class"]]
        features = vehicle_data[feature_columns].values
        
        # Extract the class for each sequence
        class_label = vehicle_data["class"].iloc[-1]
        
        #print(features.shape)
        #print(len(features))
        if len(features) < pad_length:
            # Pad the feature sequence with 0
            padded_features = np.pad(features, ((0, pad_length - len(features)), (0, 0)), constant_values=0)
        else:
            # Truncate if sequence to long
            padded_features = features[:pad_length]
        
        X_data.append(padded_features)
        y_data.append(class_label)
    
    X_data = np.array(X_data)  # (num_samples, pad_length, num_features)
    y_data = np.array(y_data)  # (num_samples,)
    
    return X_data, y_data
        

In [None]:
dataset_1_train = pd.read_csv("datasets/train_dataset_1.csv")
X_train_1, y_train_1 = prep_data_for_rocket(dataset_1_train)

dataset_2_train = pd.read_csv("datasets/train_dataset_1.csv")
X_train_2, y_train_2 = prep_data_for_rocket(dataset_2_train)

print("Dataset 1")
print("X: ", X_train_1.shape)
print("y: ", y_train_1.shape)

print("Dataset 2")
print("X: ", X_train_2.shape)
print("y: ", y_train_2.shape)

In [None]:
dataset_1_val = pd.read_csv("datasets/val_dataset_1.csv")
X_val_1, y_val_1 = prep_data_for_rocket(dataset_1_val)

dataset_2_val = pd.read_csv("datasets/val_dataset_2.csv")
X_val_2, y_val_2 = prep_data_for_rocket(dataset_2_val)

print("Dataset 1")
print("X: ", X_val_1.shape)
print("y: ", y_val_1.shape)

print("Dataset 2")
print("X: ", X_val_2.shape)
print("y: ", y_val_2.shape)

In [None]:
dataset_1_test = pd.read_csv("datasets/test_dataset_1.csv")
X_test_1, y_test_1 = prep_data_for_rocket(dataset_1_test)

dataset_2_test = pd.read_csv("datasets/test_dataset_2.csv")
X_test_2, y_test_2 = prep_data_for_rocket(dataset_2_test)

print("Dataset 1")
print("X: ", X_test_1.shape)
print("y: ", y_test_1.shape)

print("Dataset 2")
print("X: ", X_test_2.shape)
print("y: ", y_test_2.shape)

#### Transform the features with the ROCKET model for two datasets

In [None]:
rocket_1 = Rocket(num_kernels=10_000, random_state=50)
rocket_1.fit(X_train_1)
X_train_transformed_1 = rocket_1.transform(X_train_1)
X_val_transformed_1 = rocket_1.transform(X_val_1)
X_test_transformed_1 = rocket_1.transform(X_test_1)

In [None]:
rocket_2 = Rocket(num_kernels=10_000, random_state=50)
rocket_2.fit(X_train_2)
X_train_transformed_2 = rocket_2.transform(X_train_2)
X_val_transformed_2 = rocket_2.transform(X_val_2)
X_test_transformed_2 = rocket_2.transform(X_test_2)

In [None]:
print(X_train_transformed_1.shape)
print(X_val_transformed_1.shape)
print(X_test_transformed_1.shape)

In [None]:
print(X_train_transformed_2.shape)
print(X_val_transformed_2.shape)
print(X_test_transformed_2.shape)

#### Train the classifier on the transformed features

In [None]:
def train_and_evaluate_logisctic_regression(X_train, y_train, X_val, y_val, c_values):
    results = {"c_values": [], 
                "train_accuracy": [], 
                "val_accuracy": [], 
                "train_loss": [], 
                "val_loss": [],
                "models": []
                }

    for c in c_values:
        # Initialize and train the model
        model = LogisticRegression(C=c, solver="newton-cg", max_iter=1000, random_state=50)
        model.fit(X_train, y_train)

        # Predictions
        y_train_pred = model.predict(X_train)
        y_val_pred = model.predict(X_val)
        
        # Probabilities for log loss
        y_train_proba = model.predict_proba(X_train)
        y_val_proba = model.predict_proba(X_val)

        # Calculate accuracy and log loss
        train_acc = accuracy_score(y_train, y_train_pred)
        val_acc = accuracy_score(y_val, y_val_pred)
        train_loss = log_loss(y_train, y_train_proba)
        val_loss = log_loss(y_val, y_val_proba)

        # Store results
        results["c_values"].append(c)
        results["train_accuracy"].append(train_acc)
        results["val_accuracy"].append(val_acc)
        results["train_loss"].append(train_loss)
        results["val_loss"].append(val_loss)
        results["models"].append(model)

    return results


In [None]:
C_VALUES = [0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01]
model_1 = train_and_evaluate_logisctic_regression(X_train_transformed_1,
                                                  y_train_1,
                                                  X_val_transformed_1,
                                                  y_val_1,
                                                  C_VALUES)
model_1

In [None]:
model_2 = train_and_evaluate_logisctic_regression(X_train_transformed_2,
                                                  y_train_2,
                                                  X_val_transformed_2,
                                                  y_val_2,
                                                  C_VALUES)
model_2

In [None]:
def eval_on_test_set(model, X, y_true):

    # Make predictions
    y_test_pred = model.predict(X)

    # Compute metrics
    accuracy = accuracy_score(y_true, y_test_pred)

    # Confusion Matrix
    conf_matrix = confusion_matrix(y_test_2, y_test_pred)

    # Print Metrics
    print(f"Accuracy: {accuracy:.4f}")

    # Detailed Classification Report
    print("\nClassification Report:\n", classification_report(y_test_2, y_test_pred))

    # Plot Confusion Matrix
    plt.figure(figsize=(6, 5))
    sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=[0, 1, 2, 3, 4], yticklabels=[0, 1, 2, 3, 4])
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.title("Confusion Matrix")
    plt.show()

In [None]:
final_model_1 = model_1["models"][2]
print(final_model_1)
eval_on_test_set(final_model_1, X_test_transformed_1, y_test_1)

In [None]:
final_model_2 = model_2["models"][2] # c=1e-05
print(final_model_2)
eval_on_test_set(final_model_2, X_test_transformed_2, y_test_2)