In [None]:
# libraries
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import DataLoader

# custom
import model
import utilities


In [None]:
MACHINE = 'RoboticArm' # choose between BrushlessMotor and RoboticArm

In [None]:
# Paths to the training and testing HDF5 dataset files
TRAIN_DATASET_PATH = f'data/{MACHINE}/windowed/train_dataset_window_0.100s.h5'
TEST_DATASET_PATH = f'data/{MACHINE}/windowed/test_dataset_window_0.100s.h5'

# List of sensor names to be extracted from the dataset
SENSORS = [
    'imp23absu_mic',
    'ism330dhcx_acc',
    'ism330dhcx_gyro'
]

# List of label names to be extracted from the dataset
LABEL_NAMES = ['segment_id',
               'split_label',
               'anomaly_label',
               'domain_shift_op',
               'domain_shift_env']

PARAMS = {
    'layer_dims': [2048, 2048, 2048, 16],
    'lr': 0.0001,
    'criterion': utilities.MSE,
    'batch_size': 1024,
    'num_epochs': 1000,
    # TO BE ADAPTED TO YOUR MACHINE: either 'mps or 'cuda' if GPU available,
    # otherwise 'cpu'
    'device': 'cuda',
    'patience': 3,
    'normalisation': 'std_window',
    'valid_size': 0.1,
    'seed': 1995
}

# Set the seed for general torch operations
torch.manual_seed(PARAMS['seed'])
# Set the seed for MPS torch operations (ones that happen on the MPS Apple GPU)

if PARAMS['device'] == 'mps':
    torch.mps.manual_seed(PARAMS['seed'])
elif PARAMS['device'] == 'cuda':
    torch.cuda.manual_seed(PARAMS['seed'])
elif PARAMS['device'] == 'cpu':
    torch.manual_seed(PARAMS['seed'])
else:
    raise ValueError(f"Unsupported device type: {PARAMS['device']}")


In [None]:
# Load the dataset
X_train_raw, Y_train_raw, X_test, Y_test = utilities.load_dataset(
    TRAIN_DATASET_PATH, TEST_DATASET_PATH, LABEL_NAMES, SENSORS)

# Combine anomaly labels and domain shift labels to form a combined label
Y_train_raw['combined_label'] = Y_train_raw['anomaly_label'] + \
    Y_train_raw['domain_shift_op'] + Y_train_raw['domain_shift_env']
Y_test['combined_label'] = Y_test['anomaly_label'] + \
    Y_test['domain_shift_op'] + Y_test['domain_shift_env']

# Split training data into training and validation sets, maintaining the
# stratified distribution of the combined label
train_indices, valid_indices, _, _ = train_test_split(
    range(len(Y_train_raw)),
    Y_train_raw,
    stratify=Y_train_raw['combined_label'],
    test_size=PARAMS['valid_size'],
    random_state=PARAMS['seed']
)

# Select the training and validation data based on the indices
X_train = [sensor_data[train_indices] for sensor_data in X_train_raw]
X_valid = [sensor_data[valid_indices] for sensor_data in X_train_raw]
Y_train = Y_train_raw.iloc[train_indices].reset_index(drop=True)
Y_valid = Y_train_raw.iloc[valid_indices].reset_index(drop=True)

# Normalize the training, validation, and test datasets using the
# specified normalization method
X_train, X_valid, X_test = utilities.normalize_data(
    X_train, X_valid, X_test, PARAMS['normalisation'])

# Extract the number of channels and window lengths for each sensor
NUM_CHANNELS = [x.shape[1] for x in X_train]
WINDOW_LENGTHS = [x.shape[2] for x in X_train]

X_train_tensor = [torch.from_numpy(x).to(PARAMS['device']) for x in X_train]
X_valid_tensor = [torch.from_numpy(x).to(PARAMS['device']) for x in X_valid]
X_test_tensor = [torch.from_numpy(x).to(PARAMS['device']) for x in X_test]

train_dataset = utilities.CustomDataset(X_train_tensor)
valid_dataset = utilities.CustomDataset(X_valid_tensor)
test_dataset = utilities.CustomDataset(X_test_tensor)

train_data_loader = DataLoader(
    train_dataset, batch_size=PARAMS['batch_size'], shuffle=True)
valid_data_loader = DataLoader(
    valid_dataset, batch_size=PARAMS['batch_size'], shuffle=False)
test_data_loader = DataLoader(
    test_dataset, batch_size=PARAMS['batch_size'], shuffle=False)


In [None]:
baseline = model.AutoencoderFC(WINDOW_LENGTHS, NUM_CHANNELS, PARAMS, SENSORS)
optimizer = torch.optim.Adam(baseline.parameters(), lr=PARAMS['lr'])
baseline.fit(train_data_loader, valid_data_loader, optimizer)
AUC_scores = baseline.test(test_data_loader, Y_test, utilities.MSE, 'median')


## Show results table as in paper

In [None]:
results = AUC_scores.copy()
results.columns = ['S+T', 'Source', 'Target']
new_order = [
    'total_loss',
    'f_ism330dhcx_acc',
    's_ism330dhcx_acc',
    'f_ism330dhcx_gyro',
    's_ism330dhcx_gyro',
    'f_imp23absu_mic',
    's_imp23absu_mic']
results = results.reindex(new_order)
results.index = [
    'Overall',
    'F-acc',
    'S-acc',
    'F-gyr',
    'S-gyr',
    'F-mic',
    'S-mic']
results = results * 100
results = results.round(2)
results


## Save model and results

In [None]:
import os
MODEL_PATH = f'models/{MACHINE}'
os.makedirs(MODEL_PATH, exist_ok=True)

torch.save(
    baseline.state_dict(),
    MODEL_PATH +
    os.sep +
    f'baseline_seed{PARAMS["seed"]}.pth')


In [None]:
RESULTS_PATH = f'results/{MACHINE}'
os.makedirs(RESULTS_PATH, exist_ok=True)
results.to_csv(RESULTS_PATH + os.sep + 'AUC_scores.csv')
