In [1]:
import pandas as pd
import numpy as np

boning_activity_class_names = ['Idle', 'Walking', 'Steeling', 'Reaching', 'Cutting', 'Dropping']
slicing_activity_class_names = ['Idle', 'Walking', 'Steeling', 'Reaching', 'Cutting', 'Slicing', 'Pulling', 'Placing/Manipulation', 'Dropping']

# **Reading the CSVs**

In [2]:
boning_df_resampled = pd.read_csv('boning_df_resampled.csv')
slicing_df_resampled = pd.read_csv('slicing_df_resampled.csv')

# **Model Comparison**

### **Random Forest Classifier**

In [3]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

def RFM(df):
    X = df.iloc[:, :df.shape[1] - 2]
    y = df.iloc[:, -2]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)

    clf = RandomForestClassifier(n_estimators=50, 
                                 random_state=42,
                                 n_jobs=-1)

    skf = StratifiedKFold(
        n_splits=5,
        shuffle=True,
        random_state=42
    )

    scores = cross_val_score(
        clf,
        X,
        y,
        cv=skf,
        scoring='accuracy',
        n_jobs=-1
    )

    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)
    report = classification_report(y_test, y_pred)

    print("Test accuracy: {:.2f}%".format(accuracy * 100))
    print("Confusion Matrix:")
    print(cm)
    print("Classification Report:")
    print(report)
    print("CV accuracies:", scores)
    print("Mean CV accuracy: {:.2f}%".format(scores.mean()*100))
    print("Std  CV accuracy: {:.2f}%".format(scores.std()*100))

    return accuracy, report

In [5]:
RFM_slicing_accuracy, RFM_slicing_report = RFM(slicing_df_resampled)

Test accuracy: 81.99%
Confusion Matrix:
[[21503     8   140    93   701   103     0     3    39]
 [   12 11285     5     0     1     0     0     0     0]
 [  293    48 20481   242  1273   206     3     9    21]
 [  148    11    85 21044  1041   191     0     3    47]
 [  981    33   440   926 29106  2461    32    33   357]
 [  466    13   405   690 12126 10635    15    19   141]
 [   38     2    22   252  2230   248 14063    23    36]
 [   56     1     9   114  2148   172    10 14396    18]
 [  556    19    72   371  4430   290     7     6 16797]]
Classification Report:
              precision    recall  f1-score   support

           0       0.89      0.95      0.92     22590
           1       0.99      1.00      0.99     11303
           2       0.95      0.91      0.93     22576
           3       0.89      0.93      0.91     22570
           4       0.55      0.85      0.67     34369
           5       0.74      0.43      0.55     24510
           6       1.00      0.83      0.91 

In [6]:
RFM_boning_accuracy, RFM_boning_report = RFM(boning_df_resampled)

Test accuracy: 93.85%
Confusion Matrix:
[[21657   363   533    44   566     1]
 [  246 19537   173    31   273     0]
 [  736   328 21332   102   661     3]
 [   94    94   109 22470   393     2]
 [  825   468   756   495 23938    17]
 [  320   281    56    26   401 19185]]
Classification Report:
              precision    recall  f1-score   support

           0       0.91      0.93      0.92     23164
           1       0.93      0.96      0.95     20260
           2       0.93      0.92      0.93     23162
           3       0.97      0.97      0.97     23162
           4       0.91      0.90      0.91     26499
           5       1.00      0.95      0.97     20269

    accuracy                           0.94    136516
   macro avg       0.94      0.94      0.94    136516
weighted avg       0.94      0.94      0.94    136516

CV accuracies: [0.94429245 0.94281947 0.94363257 0.94414899 0.9438743 ]
Mean CV accuracy: 94.38%
Std  CV accuracy: 0.05%


### **Logistic Regression**

In [7]:
from sklearn.linear_model import LogisticRegression

def LRM(df):
    X = df.iloc[:, :df.shape[1] - 2]
    y = df.iloc[:, -2]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    log_reg = LogisticRegression(max_iter=500)

    skf = StratifiedKFold(
        n_splits=5,
        shuffle=True,
        random_state=42
    )

    scores = cross_val_score(
        log_reg,
        X,
        y,
        cv = skf,
        scoring='accuracy',
        n_jobs=-1
    )
    
    log_reg.fit(X_train, y_train)
    
    y_pred = log_reg.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)
    report = classification_report(y_test, y_pred)

    print("Test accuracy: {:.2f}%".format(accuracy * 100))
    print("Confusion Matrix:")
    print(cm)
    print("Logistic Regression:")
    print(report)
    print("Mean CV accuracy: {:.2f}%".format(scores.mean()*100))
    print("Std  CV accuracy: {:.2f}%".format(scores.std()*100))

    return accuracy, report

In [8]:
LRM_boning_accuracy, LRM_boning_report = LRM(boning_df_resampled)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Test accuracy: 36.99%
Confusion Matrix:
[[  497  2777 10442  2463  2402  4378]
 [   88  7389   360  2954   695  8938]
 [  237  1026  8175  1846  3714  8324]
 [   57  2152   262  6333  1445 12757]
 [  426  2100  2855  1923 12322  6771]
 [  104  1180   242  1821  1276 15785]]
Logistic Regression:
              precision    recall  f1-score   support

           0       0.35      0.02      0.04     22959
           1       0.44      0.36      0.40     20424
           2       0.37      0.35      0.36     23322
           3       0.37      0.28      0.31     23006
           4       0.56      0.47      0.51     26397
           5       0.28      0.77      0.41     20408

    accuracy                           0.37    136516
   macro avg       0.39      0.37      0.34    136516
weighted avg       0.40      0.37      0.34    136516

Mean CV accuracy: 36.89%
Std  CV accuracy: 1.61%


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [9]:
LRM_slicing_accuracy, LRM_slicing_report = LRM(slicing_df_resampled)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Test accuracy: 22.59%
Confusion Matrix:
[[   40  1226  6436  2807  1525    16     7   268 10157]
 [    2  3906  1048  1472   119     0     0   395  4342]
 [    4   409  9388  1341  1121    36    19   180 10138]
 [    1   488   738  3659  1639     7    10   177 16023]
 [   30  1709 10046  2269 11446   177    59   147  8546]
 [   13   792  5310  2139  5824   124    54   112 10063]
 [    1   207   756  1958  1639    15    13    72 12221]
 [    5   398   623  2166  1402     4     6   303 12070]
 [    6   531  1981  2503  2133    10     5   250 15022]]
Logistic Regression:
              precision    recall  f1-score   support

           0       0.39      0.00      0.00     22482
           1       0.40      0.35      0.37     11284
           2       0.26      0.41      0.32     22636
           3       0.18      0.16      0.17     22742
           4       0.43      0.33      0.37     34429
           5       0.32      0.01      0.01     24431
           6       0.08      0.00      0.00   

### **Support Vector Machine**

In [26]:
from sklearn.svm import LinearSVC

def fast_linear_svm(df):
    X = df.iloc[:, :df.shape[1] - 2]
    y = df.iloc[:, -2]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    clf = make_pipeline(
    StandardScaler(),
    LinearSVC(dual=False,
              C=1.0,
              max_iter=10_000,
              random_state=42)
    )

    skf = StratifiedKFold(
        n_splits=5,
        shuffle=True,
        random_state=42
    )    

    scores = cross_val_score(
        clf,
        X,
        y,
        cv=skf,
        scoring='accuracy',
        n_jobs=-1
    )

    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)

    print("Test Accuracy: {:.2f}%".format(accuracy_score(y_test, y_pred) * 100))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
    print("Classification Report:\n", classification_report(y_test, y_pred))
    print("Mean CV accuracy: {:.2f}%".format(scores.mean()*100))
    print("Std  CV accuracy: {:.2f}%".format(scores.std()*100))


    return accuracy, report

In [27]:
svm_boning_accuracy, svm_boning_report = fast_linear_svm(boning_df_resampled)

KeyboardInterrupt: 

In [13]:
svm_slicing_accuracy, svm_slicing_report = fast_linear_svm(slicing_df_resampled)

# **LSTM-CNN Hybrid Initialization**

In [16]:
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import TimeDistributed, Attention, Add, Input, Bidirectional, Dropout, LSTM, Conv1D, MaxPool1D, GlobalAveragePooling1D, BatchNormalization, Dense, Activation, Reshape
from tensorflow.keras.regularizers import l2

def model_init(time_steps: int, num_classes: int):
    inp = Input(shape=(time_steps,1))

    # first Bi‐LSTM → 256 channels
    x1 = Bidirectional(LSTM(128, return_sequences=True))(inp)
    x1 = Dropout(0.2)(x1)          # (batch, time_steps, 256)

    # second Bi‐LSTM → 128 channels
    x2 = Bidirectional(LSTM(64, return_sequences=True))(x1)
    x2 = Dropout(0.2)(x2)          # (batch, time_steps, 128)

    # project x2 → 256 channels
    x2_proj = TimeDistributed(Dense(256))(x2)

    # now you can add
    res = Add()([x1, x2_proj])     # (batch, time_steps, 256)

    # self‐attention
    attn = Attention()([res, res])

    # … your Conv blocks, pooling, final Dense …
    c1 = Conv1D(128, 3, padding='same', activation='relu')(attn)
    c1 = BatchNormalization()(c1)
    c1 = MaxPool1D(2)(c1)
    c1 = Dropout(0.2)(c1)

    c2 = Conv1D(256, 3, padding='same', activation='relu')(c1)
    c2 = BatchNormalization()(c2)
    c2 = MaxPool1D(2)(c2)
    c2 = Dropout(0.2)(c2)

    # ── Final pooling & output ───────────────────────────────────────────
    gap = GlobalAveragePooling1D()(c2)
    out = Dense(num_classes, activation='softmax')(gap)

    model = Model(inputs=inp, outputs=out)

    model.summary()
    
    model.compile(
        optimizer=Adam(learning_rate=1e-4, clipnorm=1.0),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    return model


In [19]:
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

def train_on_LSTMCNN(df, *,
                              col='Label',
                              test_size=0.2,
                              val_split=0.2,
                              epochs=150,
                              batch_size=128,
                              patience=4,
                              random_state=42,
                              model_name="unnamed"):
    
    train_df, test_df = train_test_split(
        df, 
        test_size=test_size,
        stratify=df[col],
        random_state=random_state
    )
    
    # not counting 'Label' and 'sharpness' columns into the time_steps
    time_steps = train_df.shape[1] - 2

    X_train_raw = train_df.drop(columns=['sharpness', 'Label']).values
    y_train_raw = train_df[col].values

    X_test_raw = test_df.drop(columns=['sharpness', 'Label']).values
    y_test_raw = test_df[col].values

    # label encoding
    le = LabelEncoder().fit(y_train_raw)
    y_train = le.transform(y_train_raw)
    y_test = le.transform(y_test_raw)
    num_classes = len(le.classes_)

    # scaling features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train_raw)
    X_test_scaled = scaler.fit_transform(X_test_raw)

    # Reshape to 3D for LSTM-CNN model
    time_steps = X_train_scaled.shape[1]
    X_train = X_train_scaled.reshape(-1, time_steps, 1)
    X_test = X_test_scaled.reshape(-1, time_steps, 1)

    print(time_steps, num_classes)

    assert np.isfinite(X_train_scaled).all()
    assert not np.isnan(y_train).any()

    model = model_init(time_steps, num_classes)
    
    early_stopping = EarlyStopping(
        monitor='val_loss',
        patience=patience,
        restore_best_weights=True
    )

    rlr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6)
    
    # fit with validation split
    history = model.fit(
        X_train, y_train,
        epochs=epochs,
        batch_size=batch_size,
        validation_split=val_split,
        callbacks=[early_stopping, rlr],
        verbose=1
    )

    # evaluate on test set
    test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
    print(f"Test Accuracy: {test_acc*100:.2f}%")

    model.save(f"{model_name}.keras")

    return history, X_test, y_test, test_loss, test_acc
    

# **Activity Recognition Models**

### **Boning**

In [20]:
boning_activity_history, boning_activity_X_test, boning_activity_y_test, boning_activity_test_loss, boning_activity_test_acc = train_on_LSTMCNN(
    boning_df_resampled,
    col='Label',
    model_name="boning_activity_recognition_LSTMCNN"
)

139 6


Epoch 1/150
[1m 173/2276[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m1:29[0m 43ms/step - accuracy: 0.2489 - loss: 1.7697

KeyboardInterrupt: 

### **Slicing**

In [21]:
slicing_activity_history, slicing_activity_X_test, slicing_activity_y_test, slicing_activity_test_loss, slicing_activity_test_acc = train_on_LSTMCNN(
    slicing_df_resampled,
    col='Label',
    model_name="slicing_activity_recognition_LSTMCNN"
)

139 9


Epoch 1/150
[1m 480/3239[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m1:59[0m 43ms/step - accuracy: 0.1820 - loss: 2.1578

KeyboardInterrupt: 

# **Sharpness Classification Model**

### **Boning**


In [22]:
boning_sharpness_history, boning_sharpness_X_test, boning_sharpness_y_test, boning_sharpness_test_loss, boning_sharpness_test_acc = train_on_LSTMCNN(
    boning_df_resampled,
    col='sharpness',
    model_name="boning_sharpness_classification_LSTMCNN"
)

139 8


Epoch 1/150
[1m2276/2276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m108s[0m 46ms/step - accuracy: 0.1974 - loss: 2.0164 - val_accuracy: 0.2003 - val_loss: 2.0308 - learning_rate: 1.0000e-04
Epoch 2/150
[1m2276/2276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m104s[0m 46ms/step - accuracy: 0.2272 - loss: 1.9420 - val_accuracy: 0.2338 - val_loss: 1.9542 - learning_rate: 1.0000e-04
Epoch 3/150
[1m2276/2276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m104s[0m 46ms/step - accuracy: 0.2464 - loss: 1.9027 - val_accuracy: 0.2454 - val_loss: 1.9130 - learning_rate: 1.0000e-04
Epoch 4/150
[1m2276/2276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m107s[0m 47ms/step - accuracy: 0.2643 - loss: 1.8688 - val_accuracy: 0.2572 - val_loss: 1.8876 - learning_rate: 1.0000e-04
Epoch 5/150
[1m2276/2276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m107s[0m 47ms/step - accuracy: 0.2801 - loss: 1.8360 - val_accuracy: 0.2831 - val_loss: 1.8266 - learning_rate: 1.0000e-04
Epoch 6/150
[1m2276

KeyboardInterrupt: 

### **Slicing**

In [23]:
slicing_sharpness_history, slicing_sharpness_X_test, slicing_sharpness_y_test, slicing_sharpness_test_loss, slicing_sharpness_test_acc = train_on_LSTMCNN(
    slicing_df_resampled,
    col='sharpness',
    model_name="slicing_sharpness_classification_LSTMCNN"
)

139 4


Epoch 1/150
[1m 353/3239[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m2:05[0m 44ms/step - accuracy: 0.2893 - loss: 1.4121

KeyboardInterrupt: 