In [1]:
import pandas as pd
import numpy as np

In [2]:
BONING_PATHS = [
    'Theme2/P1/Boning/MVN-J-Boning-64-001.xlsx', 
    'Theme2/P1/Boning/MVN-J-Boning-90-003.xlsx', 
    'Theme2/P1/Boning/MVN-J-Boning-90-002.xlsx', 
    'Theme2/P1/Boning/MVN-J-Boning-90-004.xlsx', 
    'Theme2/P1/Boning/MVN-J-Boning-64-006.xlsx', 
    'Theme2/P1/Boning/MVN-J-Boning-64-004.xlsx', 
    'Theme2/P1/Boning/MVN-J-Boning-64-002.xlsx', 
    'Theme2/P1/Boning/MVN-J-Boning-90-001.xlsx', 
    'Theme2/P1/Boning/MVN-J-Boning-64-003.xlsx', 
    'Theme2/P1/Boning/MVN-J-Boning-79-001.xlsx', 
    'Theme2/P1/Boning/MVN-J-Boning-64-005.xlsx',
    'Theme2/P2/Boning/MVN-S-Boning-89-001.xlsx', 
    'Theme2/P2/Boning/MVN-S-Boning-89-002.xlsx', 
    'Theme2/P2/Boning/MVN-S-Boning-89-003.xlsx', 
    'Theme2/P2/Boning/MVN-S-Boning-76-001.xlsx', 
    'Theme2/P2/Boning/MVN-S-Boning-63-003.xlsx', 
    'Theme2/P2/Boning/MVN-S-Boning-63-001.xlsx', 
    'Theme2/P2/Boning/MVN-S-Boning-76-002.xlsx', 
    'Theme2/P2/Boning/MVN-S-Boning-63-002.xlsx', 
    'Theme2/P2/Boning/MVN-S-Boning-89-004.xlsx'
]

SLICING_PATHS = [
    'Theme2/P1/Slicing/MVN-J-Slicing-64-001.xlsx', 
    'Theme2/P1/Slicing/MVN-J-Slicing-87-001.xlsx', 
    'Theme2/P1/Slicing/MVN-J-Slicing-73-001.xlsx',
    'Theme2/P2/Slicing/MVN-S-Slicing-87-001.xlsx', 
    'Theme2/P2/Slicing/MVN-S-Slicing-73-001.xlsx', 
    'Theme2/P2/Slicing/MVN-S-Slicing-63-001.xlsx'
]

ACCELERATION_SHEETS = [
    'Segment Acceleration', 
    'Segment Angular Acceleration'
]

boning_class_names = ['Idle', 'Walking', 'Steeling', 'Reaching', 'Cutting', 'Dropping']
slicing_class_names = ['Idle', 'Walking', 'Steeling', 'Reaching', 'Cutting', 'Slicing', 'Pulling', 'Placing/Manipulation', 'Dropping']

# **Merging the Files into a DataFrame**

- merge boning and slicing separately

In [3]:
# from collections import defaultdict

# sharpness_levels = [64, 79, 90, 87, 73, 63, 76, 89]

# def process_acceleration_data(file_paths, sharpness_levels):
#     """Process acceleration data from multiple Excel files with different sharpness levels."""
#     # Dictionary to store processed data by sheet type
#     sheet_data = defaultdict(list)
    
#     # Process all files and extract data by sheet type
#     for file_path, sharpness in zip(file_paths, sharpness_levels):
#         try:
#             xls = pd.ExcelFile(file_path)
            
#             for sheet_name in xls.sheet_names:
#                 if sheet_name not in ACCELERATION_SHEETS:
#                     continue
                
#                 try:
#                     # Read the sheet
#                     df = pd.read_excel(xls, sheet_name=sheet_name)
#                     print(f"{df.shape}")
                    
#                     if df.empty:
#                         continue

#                     if 'Label' not in df.columns:
#                         print(f"Skipping {sheet_name} since it doesn't have a label column")
#                         continue
                    
#                     # Add sharpness column
#                     df['sharpness'] = sharpness
                    
#                     # Add sheet name as a column to differentiate data source
#                     df['sheet_type'] = sheet_name
                    
#                     # Add to our collection
#                     sheet_data[sheet_name].append(df)
                
#                 except Exception as e:
#                     print(f"Error processing sheet {file_path} {sheet_name}: {e}")
        
#         except Exception as e:
#             print(f"Error processing file {file_path}: {e}")
    
#     # Combine data from all files
#     combined_dfs = []
    
#     for sheet_name, dfs in sheet_data.items():
#         if dfs:
#             # Concatenate all data for this sheet type
#             sheet_combined_df = pd.concat(dfs, ignore_index=True)
#             combined_dfs.append(sheet_combined_df)
    
#     # Return combined data frame
#     if combined_dfs:
#         final_df = pd.concat(combined_dfs, ignore_index=True)
#         return final_df
    
#     return None

# def main():
#     # Process the data
#     merged_df = process_acceleration_data(
#         file_paths=SLICING_PATHS,
#         sharpness_levels=sharpness_levels
#     )
    
#     if merged_df is None or merged_df.empty:
#         print("No results generated!")
#         return
    
#     # Save the final merged dataframe
#     output_file = "slicing_acceleration_data.csv"
#     merged_df.to_csv(output_file, index=False)
#     print(f"Saved data to {output_file} ({merged_df.shape[0]} rows, {merged_df.shape[1]} columns)")

# if __name__ == "__main__":
#     main()

In [4]:
boning_df = pd.read_csv('boning_acceleration_data.csv')
slicing_df = pd.read_csv('slicing_acceleration_data.csv')

# **Creating composite features based off XYZ**

In [5]:
def create_composite_features(df):
    """
    Create composite features from motion capture data - optimized for performance
    """
    # Columns to preserve
    preserved_columns = {}
    if 'Label' in df.columns:
        preserved_columns['Label'] = df['Label']
    if 'sharpness' in df.columns:
        preserved_columns['sharpness'] = df['sharpness']
    
    # Get all unique body parts
    body_parts = set()
    for col in df.columns:
        if col.endswith(' x') or col.endswith(' y') or col.endswith(' z'):
            body_part = col[:-2]  # Remove the ' x', ' y', or ' z' suffix
            body_parts.add(body_part)
    
    # Dictionary to collect all features
    all_features = {}
    
    # Create aggregated features for each body part
    for body_part in body_parts:
        # Extract x, y, z components
        x_col = f"{body_part} x"
        y_col = f"{body_part} y"
        z_col = f"{body_part} z"
        
        # Skip if any component is missing
        if not (x_col in df.columns and y_col in df.columns and z_col in df.columns):
            continue
        
        # RMS of x and y ("mean")
        all_features[f"{body_part}_mean"] = np.sqrt((df[x_col]**2 + df[y_col]**2) / 2)
        
        # RMS of y and z ("standard deviation")
        all_features[f"{body_part}_std"] = np.sqrt((df[y_col]**2 + df[z_col]**2) / 2)
        
        # RMS of z and x ("min")
        all_features[f"{body_part}_min"] = np.sqrt((df[z_col]**2 + df[x_col]**2) / 2)
        
        # RMS of x, y, and z ("max")
        all_features[f"{body_part}_max"] = np.sqrt((df[x_col]**2 + df[y_col]**2 + df[z_col]**2) / 3)
        
        # roll calculation ("Area under the curve")
        denominator = np.sqrt(df[x_col]**2 + df[z_col]**2)
        denominator = np.where(denominator == 0, 1e-10, denominator)
        all_features[f"{body_part}_AUC"] = 180 * np.arctan2(df[y_col], denominator) / np.pi
        
        # pitch calculation ("peaks")
        denominator = np.sqrt(df[y_col]**2 + df[z_col]**2)
        denominator = np.where(denominator == 0, 1e-10, denominator)
        all_features[f"{body_part}_peaks"] = 180 * np.arctan2(df[x_col], denominator) / np.pi
    
    # Combine preserved columns and features
    all_features.update(preserved_columns)
    
    # Create DataFrame in one go
    aggregated_features = pd.DataFrame(all_features)
    
    return aggregated_features

In [6]:
boning_df = create_composite_features(boning_df)
slicing_df = create_composite_features(slicing_df)

In [7]:
print(slicing_df.shape)
print(boning_df.shape)

(357114, 140)
(151158, 140)


In [8]:
slicing_df['Label'].unique()

array([ 0.,  4.,  8.,  2.,  1.,  3.,  5., nan,  7.,  6.])

#### Converting the label values into int

In [9]:
slicing_df.dropna(inplace=True)
slicing_df['Label'] = slicing_df['Label'].round().astype(int)

In [10]:
boning_df['sharpness'].unique()

array([64, 90, 87, 73, 63, 76, 89, 79])

# **Implementing an Over and Under sampling pipeline**

In [11]:
import numpy as np
import pandas as pd
import tensorflow as tf
from collections import Counter

from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import train_test_split

from imblearn.under_sampling import TomekLinks
from imblearn.pipeline import Pipeline as ImbPipeline
from imblearn.over_sampling import SMOTENC

def oversample_data(df):
    # 1. Split off train / (val+test)
    train_df, temp_df = train_test_split(df, test_size=0.3, stratify=df['Label'], random_state=42)
    
    # 2. Further split val + test 50/50
    val_df, test_df = train_test_split(temp_df, test_size=0.5, stratify=temp_df['Label'], random_state=42)
    
    # 3. Prepare X and combined target
    numeric_cols = train_df.select_dtypes(include=[np.number]).columns.tolist()
    X = train_df[numeric_cols].copy()
    y_combined = (X['Label'].astype(str) + "_" + X['sharpness'].astype(str)).values
    X = X.drop(columns=['sharpness'])
    
    # 4. Scale numeric features
    numeric_feats = [c for c in X.columns if c != 'Label']
    scaler = RobustScaler()
    X[numeric_feats] = scaler.fit_transform(X[numeric_feats])
    
    # 5. Build sampling strategy
    counts = Counter(y_combined)
    majority = max(counts.values())
    target_smote = int(0.5 * majority)
    smote_strategy = {cls: target_smote for cls, cnt in counts.items() if cnt < target_smote}
    
    # 6. Set up SMOTENC + Tomek pipeline
    categorical_features = [X.columns.get_loc('Label')]
    smote_nc = SMOTENC(categorical_features=categorical_features,
                       random_state=42, k_neighbors=15,
                       sampling_strategy=smote_strategy)
    tomek = TomekLinks(sampling_strategy='all')
    pipeline = ImbPipeline([('smote', smote_nc),
                            ('undersample', tomek)])
    
    # 7. Run oversampling
    X_resampled, y_resampled = pipeline.fit_resample(X.values, y_combined)
    
    # 8. Extract just the activity label (before the underscore)
    y_activity = np.array([int(lbl.split('_')[0]) for lbl in y_resampled])
    
    # 9. Build final balanced DataFrame
    balanced_df = pd.DataFrame(X_resampled, columns=X.columns)
    balanced_df['Label'] = y_activity
    
    print("Balanced class counts:", Counter(balanced_df['Label']))
    return balanced_df, y_resampled


2025-04-25 15:51:28.204734: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-25 15:51:28.211174: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745560288.216795   20692 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745560288.218466   20692 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1745560288.223748   20692 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [12]:
slicing_df_resampled, slicing_df_target = oversample_data(slicing_df)

Balanced class counts: Counter({4: 114565, 5: 81699, 0: 75300, 2: 75252, 3: 75233, 8: 75160, 7: 56412, 6: 56382, 1: 37676})


In [13]:
boning_df_resampled, boning_df_target= oversample_data(boning_df)

Balanced class counts: Counter({4: 88331, 0: 77213, 3: 77205, 2: 77205, 5: 67564, 1: 67533})


In [14]:
slicing_df_resampled

Unnamed: 0,Left Upper Leg_mean,Left Upper Leg_std,Left Upper Leg_min,Left Upper Leg_max,Left Upper Leg_AUC,Left Upper Leg_peaks,Left Shoulder_mean,Left Shoulder_std,Left Shoulder_min,Left Shoulder_max,...,Right Lower Leg_max,Right Lower Leg_AUC,Right Lower Leg_peaks,Right Upper Leg_mean,Right Upper Leg_std,Right Upper Leg_min,Right Upper Leg_max,Right Upper Leg_AUC,Right Upper Leg_peaks,Label
0,0.284726,-0.231925,0.089892,-0.018087,0.035955,-1.569853,-0.130567,-0.426333,-0.054528,-0.202912,...,-0.297593,1.530643,-0.151742,-0.360314,-0.277217,-0.177269,-0.265644,-0.022232,1.629289,5
1,-0.109006,-0.103915,-0.233245,-0.181927,1.476828,-0.402963,-0.040601,-0.190285,-0.059109,-0.133258,...,-0.361415,1.539473,-0.047313,0.077673,-0.227627,0.005196,-0.093121,-0.058947,1.582495,4
2,0.646308,-0.188521,0.225204,0.120314,-0.221487,-1.549615,-0.341190,0.627203,0.620207,0.356889,...,2.969601,-1.529203,-0.119420,0.146027,1.127173,1.136548,0.998823,-0.225158,-0.158989,4
3,-0.182241,-0.229628,-0.133951,-0.210490,0.589330,1.272732,0.192008,0.480613,0.726121,0.426837,...,-0.379448,1.049589,0.723446,0.411960,0.045373,-0.051458,0.024701,1.115945,-0.760511,3
4,-0.605534,-0.279872,-0.266250,-0.355731,-0.528122,0.486907,-0.236980,-0.186746,-0.360922,-0.279895,...,-0.273899,-1.329104,-0.489319,-0.491241,-0.268551,-0.227822,-0.311845,-0.230050,1.349534,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
647674,0.308058,0.235534,0.311056,0.223663,0.195181,0.680874,2.040735,2.222736,1.497207,1.803593,...,0.162290,-0.003999,0.339099,0.356142,1.231473,1.253713,1.120655,-0.304147,-0.049683,8
647675,-0.220607,-0.171947,-0.194995,-0.222728,-1.098756,-0.715022,-0.344311,-0.351304,-0.221700,-0.331752,...,-0.284679,-1.645514,-0.032064,-0.643393,-0.283297,-0.295606,-0.371018,0.856277,-0.496406,8
647676,0.772762,2.349450,2.259739,2.127046,0.012713,-0.125224,1.954268,1.762731,1.306186,1.583650,...,0.489713,-0.016629,0.613186,-0.054921,0.046583,0.102161,0.027234,0.232923,-0.539752,8
647677,0.876826,0.741835,0.837264,0.737750,-0.019240,0.580315,0.580503,0.763853,0.479396,0.549978,...,0.728343,0.389096,-0.131148,0.060713,-0.021909,-0.132707,-0.077750,-1.211637,-0.435618,8


In [15]:
sharpness_resampled = np.array([int(lbl.split('_')[1]) for lbl in boning_df_target])
boning_df_resampled['sharpness'] = sharpness_resampled

print("Now balanced_df columns:", boning_df_resampled.columns)

Now balanced_df columns: Index(['Left Upper Leg_mean', 'Left Upper Leg_std', 'Left Upper Leg_min',
       'Left Upper Leg_max', 'Left Upper Leg_AUC', 'Left Upper Leg_peaks',
       'Left Shoulder_mean', 'Left Shoulder_std', 'Left Shoulder_min',
       'Left Shoulder_max',
       ...
       'Right Lower Leg_AUC', 'Right Lower Leg_peaks', 'Right Upper Leg_mean',
       'Right Upper Leg_std', 'Right Upper Leg_min', 'Right Upper Leg_max',
       'Right Upper Leg_AUC', 'Right Upper Leg_peaks', 'Label', 'sharpness'],
      dtype='object', length=140)


In [16]:
sharpness_resampled = np.array([int(lbl.split('_')[1]) for lbl in slicing_df_target])
slicing_df_resampled['sharpness'] = sharpness_resampled

print("Now balanced_df columns:", slicing_df_resampled.columns)

Now balanced_df columns: Index(['Left Upper Leg_mean', 'Left Upper Leg_std', 'Left Upper Leg_min',
       'Left Upper Leg_max', 'Left Upper Leg_AUC', 'Left Upper Leg_peaks',
       'Left Shoulder_mean', 'Left Shoulder_std', 'Left Shoulder_min',
       'Left Shoulder_max',
       ...
       'Right Lower Leg_AUC', 'Right Lower Leg_peaks', 'Right Upper Leg_mean',
       'Right Upper Leg_std', 'Right Upper Leg_min', 'Right Upper Leg_max',
       'Right Upper Leg_AUC', 'Right Upper Leg_peaks', 'Label', 'sharpness'],
      dtype='object', length=140)


In [17]:
boning_df_resampled.head()

Unnamed: 0,Left Upper Leg_mean,Left Upper Leg_std,Left Upper Leg_min,Left Upper Leg_max,Left Upper Leg_AUC,Left Upper Leg_peaks,Left Shoulder_mean,Left Shoulder_std,Left Shoulder_min,Left Shoulder_max,...,Right Lower Leg_AUC,Right Lower Leg_peaks,Right Upper Leg_mean,Right Upper Leg_std,Right Upper Leg_min,Right Upper Leg_max,Right Upper Leg_AUC,Right Upper Leg_peaks,Label,sharpness
0,0.682552,1.868107,1.921719,1.67774,0.101988,-0.297216,0.030683,0.37783,0.511702,0.279764,...,0.186578,-1.366432,1.507616,0.879217,1.185366,1.024399,0.255722,0.782468,4,87
1,1.505051,1.855258,1.910049,1.724229,-0.323153,0.421996,-0.095532,0.273851,0.162196,0.082272,...,-1.210508,-0.532834,0.096416,0.267077,0.159033,0.148522,0.711889,-0.018835,3,90
2,0.400196,0.00223,-0.036756,0.020019,-1.076554,0.885334,-0.293377,-0.281998,-0.305526,-0.329494,...,1.213678,0.41863,-0.158919,-0.122539,-0.075082,-0.144367,-0.118274,-0.87663,4,90
3,1.05174,0.389138,0.603739,0.514494,-0.419779,0.956311,-0.224322,-0.149276,-0.076307,-0.188293,...,1.691733,-0.122938,0.306982,1.297898,1.340874,1.142689,-0.066496,0.277738,1,90
4,1.656197,1.822196,1.605164,1.594355,0.558811,0.137996,1.900449,1.011318,2.097873,1.595336,...,0.000526,-0.112879,-0.282371,0.525813,0.508743,0.389567,-0.13186,0.09469,4,79


# **Model Comparison**

### **Random Forest Classifier**

In [18]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

def RFM(df):
    X = df.iloc[:, :138]
    y = df.iloc[:, -2]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    clf = RandomForestClassifier(n_estimators=50, random_state=42)

    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)
    report = classification_report(y_test, y_pred)

    print("Test accuracy: {:.2f}%".format(accuracy * 100))
    print("Confusion Matrix:")
    print(cm)
    print("Classification Report:")
    print(report)

    return accuracy, report

In [19]:
RFM_slicing_accuracy, RFM_slicing_report = RFM(slicing_df_resampled)

Test accuracy: 73.84%
Confusion Matrix:
[[20004    88   507   390   803   221    25   116   328]
 [   54 11216     1     4     4     0     0     2     3]
 [  799    87 18976   475  1379   335    27   168   390]
 [  391    22    46 19490  1270   359    30   504   630]
 [ 1451    79  1034  2011 23107  3541   185  1017  2004]
 [  789    28   730  1091 10434  9517   128   715   999]
 [  135    13    78   537  2317   589 12487   347   379]
 [  202     9    30   628  1919   490    33 13049   617]
 [  828    39   175  1542  2978   606    41   606 15626]]
Classification Report:
              precision    recall  f1-score   support

           0       0.81      0.89      0.85     22482
           1       0.97      0.99      0.98     11284
           2       0.88      0.84      0.86     22636
           3       0.74      0.86      0.80     22742
           4       0.52      0.67      0.59     34429
           5       0.61      0.39      0.47     24431
           6       0.96      0.74      0.84 

In [20]:
RFM_boning_accuracy, RFM_boning_report = RFM(boning_df_resampled)

Test accuracy: 89.12%
Confusion Matrix:
[[19718   819  1239   331   623   229]
 [  391 19111   134   313   283   192]
 [  861   498 20266   502   967   228]
 [  196   405   278 21380   639   108]
 [  631   509  1043  1012 22766   436]
 [  366   459   163   288   714 18418]]
Classification Report:
              precision    recall  f1-score   support

           0       0.89      0.86      0.87     22959
           1       0.88      0.94      0.91     20424
           2       0.88      0.87      0.87     23322
           3       0.90      0.93      0.91     23006
           4       0.88      0.86      0.87     26397
           5       0.94      0.90      0.92     20408

    accuracy                           0.89    136516
   macro avg       0.89      0.89      0.89    136516
weighted avg       0.89      0.89      0.89    136516



### **Logistic Regression**

In [21]:
from sklearn.linear_model import LogisticRegression

def LRM(df):
    X = df.iloc[:, :138]
    y = df.iloc[:, -2]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    log_reg = LogisticRegression(max_iter=500)
    log_reg.fit(X_train, y_train)
    y_pred = log_reg.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)
    report = classification_report(y_test, y_pred)

    print("Test accuracy: {:.2f}%".format(accuracy * 100))
    print("Confusion Matrix:")
    print(cm)
    print("Logistic Regression:")
    print(report)

    return accuracy, report

In [22]:
LRM_boning_accuracy, LRM_boning_report = LRM(boning_df_resampled)

Test accuracy: 46.83%
Confusion Matrix:
[[11202  1900  5837  1495  1341  1184]
 [ 4265 10479  1052  2593   722  1313]
 [ 5202  1104 13381   949  2056   630]
 [ 3349  3039  3186  8975  2702  1755]
 [ 2512   996  4734  2209 14810  1136]
 [ 4343  2891  3031  3123  1943  5077]]
Logistic Regression:
              precision    recall  f1-score   support

           0       0.36      0.49      0.42     22959
           1       0.51      0.51      0.51     20424
           2       0.43      0.57      0.49     23322
           3       0.46      0.39      0.42     23006
           4       0.63      0.56      0.59     26397
           5       0.46      0.25      0.32     20408

    accuracy                           0.47    136516
   macro avg       0.48      0.46      0.46    136516
weighted avg       0.48      0.47      0.46    136516



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [23]:
LRM_slicing_accuracy, LRM_slicing_report = LRM(slicing_df_resampled)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Test accuracy: 31.21%
Confusion Matrix:
[[ 8801  1622  5411  1358  3997    30   239   210   814]
 [ 2066  7598  1110   139   268     0    33    48    22]
 [ 3092   389 13238   810  4186    89   224   166   442]
 [ 3572   312  1434  5893  8672    81   430   621  1727]
 [ 3881   530  5055  2327 18294   754  1193   848  1547]
 [ 2603   272  3756  1235 13476   690   936   572   891]
 [ 1727   116  2455  1042  8951   204  1588   345   454]
 [ 1801   150  1048  1525  9068   157   488  1487  1253]
 [ 4183   414  1736  2820  9064    92   507   580  3045]]
Logistic Regression:
              precision    recall  f1-score   support

           0       0.28      0.39      0.32     22482
           1       0.67      0.67      0.67     11284
           2       0.38      0.58      0.46     22636
           3       0.34      0.26      0.30     22742
           4       0.24      0.53      0.33     34429
           5       0.33      0.03      0.05     24431
           6       0.28      0.09      0.14   

### **Support Vector Machine**

In [24]:
from sklearn.svm import LinearSVC

def fast_linear_svm(df):
    X = df.iloc[:, :138]
    y = df.iloc[:, -2]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    clf = make_pipeline(
        StandardScaler(),
        LinearSVC(dual=False,
                  C=1.0,
                  max_iter=10_000,
                  random_state=42)
    )

    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)

    print("Test Accuracy: {:.2f}%".format(accuracy_score(y_test, y_pred) * 100))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
    print("Classification Report:\n", classification_report(y_test, y_pred))

    return clf

In [25]:
# svm_model_boning = fast_linear_svm(boning_df_resampled)

In [26]:
# svm_model_slicing = fast_linear_svm(slicing_df_resampled)

### **LSTM-CNN Hybrid**

In [60]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dropout, LSTM, Conv1D, MaxPool1D, GlobalAveragePooling1D, BatchNormalization, Dense, Activation, Reshape
from tensorflow.keras.regularizers import l2

# Title: Human Activity Recognition using LSTM-CNN
# Author: Tanmay Chauhan
# Date: July 9th, 2022
# Availability: https://medium.com/@tanmaychauhan111/human-activity-recognition-using-lstm-cnn-8ccb1a42cb81

def model_init(time_steps: int, num_classes: int):
    model = Sequential([
        LSTM(64, return_sequences=True,
             input_shape=(time_steps, 1),
             activation='relu',
             kernel_regularizer=l2(1e-4)),
        Dropout(0.1),

        LSTM(64, return_sequences=True,
             activation='relu',
             kernel_regularizer=l2(1e-4)),
        Dropout(0.1),

        Conv1D(64, kernel_size=2, strides=2,
               activation='relu',
               kernel_regularizer=l2(1e-4)),
        Dropout(0.1),

        MaxPool1D(pool_size=4, padding='same'),
        Conv1D(192, kernel_size=2, strides=1,
               activation='relu',
               kernel_regularizer=l2(1e-4)),
        GlobalAveragePooling1D(),
        Dropout(0.1),
        BatchNormalization(epsilon=1e-6),

        Dense(num_classes,
              activation='softmax',
              kernel_regularizer=l2(1e-4))
    ])

    model.summary()

    model.compile(
        optimizer=Adam(learning_rate=1e-4, clipnorm=1.0), #added gradient clipping
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

    return model

In [62]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

def train_activity_on_LSTMCNN(df, *,
                              model: tf.keras.Model,
                              test_size=0.2,
                              val_split=0.1,
                              epochs=50,
                              batch_size=128,
                              patience=4,
                              random_state=42):
    train_df, test_df = train_test_split(
        df, 
        test_size=test_size,
        stratify=df['Label'],
        random_state=random_state
    )
    
    # not counting 'Label' and 'sharpness' columns into the time_steps
    time_steps = train_df.shape[1] - 2

    X_train_raw = train_df.drop(columns=['sharpness', 'Label']).values
    y_train = train_df['Label'].values

    X_test_raw = test_df.drop(columns=['sharpness', 'Label']).values
    y_test = test_df['Label'].values

    # scaling features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train_raw)
    X_test_scaled = scaler.fit_transform(X_test_raw)

    # Reshape to 3D for LSTM-CNN model
    X_train = X_train_scaled.reshape(-1, time_steps, 1)
    X_test = X_test_scaled.reshape(-1, time_steps, 1)

    assert np.isfinite(X_train_scaled).all()
    assert not np.isnan(y_train).any()

    early_stopping = EarlyStopping(
        monitor='val_loss',
        patience=patience,
        restore_best_weights=True
    )
    
    # fit with validation split
    history = model.fit(
        X_train, y_train,
        epochs=epochs,
        batch_size=batch_size,
        validation_split=val_split,
        callbacks=[early_stopping],
        verbose=1
    )

    # evaluate on test set
    test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
    print(f"Test Accuracy: {test_acc*100:.2f}%")

    return history, test_loss, test_acc
    

In [63]:
time_steps = boning_df_resampled.shape[1] - 2
num_classes = len(np.unique(boning_df_resampled['Label']) - 2)

model = model_init(time_steps, num_classes)

boning_history, boning_test_loss, boning_test_acc = train_activity_on_LSTMCNN(
    boning_df_resampled,
    model=model,
    test_size=0.2,
    val_split=0.2,
    epochs=50,
    batch_size=128,
    patience=4
)

  super().__init__(**kwargs)


Epoch 1/50
[1m2276/2276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 17ms/step - accuracy: 0.2720 - loss: 1.7568 - val_accuracy: 0.3361 - val_loss: 1.6304
Epoch 2/50
[1m2276/2276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 15ms/step - accuracy: 0.3592 - loss: 1.6054 - val_accuracy: 0.3819 - val_loss: 1.5630
Epoch 3/50
[1m2276/2276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 15ms/step - accuracy: 0.4012 - loss: 1.5218 - val_accuracy: 0.4113 - val_loss: 1.4794
Epoch 4/50
[1m2276/2276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 15ms/step - accuracy: 0.4245 - loss: 1.4763 - val_accuracy: 0.4105 - val_loss: 1.4790
Epoch 5/50
[1m2276/2276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 15ms/step - accuracy: 0.4399 - loss: 1.4424 - val_accuracy: 0.4220 - val_loss: 1.4483
Epoch 6/50
[1m2276/2276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 15ms/step - accuracy: 0.4551 - loss: 1.4143 - val_accuracy: 0.4531 - val_loss: 1.3886
Epoc

KeyboardInterrupt: 