### Data preprocessing for 2nd proactive strategy

In [1]:
# Importing necessary libraries
import os
import gc
import utils
import numpy as np
import pandas as pd

#### Loading the dataset

In [2]:
# Loading the raw dataset
data = utils.load_data("data/processed/processed_data.csv", "csv")
print(f'Dataset loaded. Shape: {data.shape}')

Dataset loaded. Shape: (7409, 38)


In [3]:
# Dropping the columns "Robot Protective Stop", "Grip Lost" and "Tool Current"
data = data.drop(columns=["Robot Protective Stop", "Grip Lost", "Tool Current"])
print(f'Dataset after dropping column "Robot Protective Stop", "Grip Lost" and "Tool Current". Shape: {data.shape}')

Dataset after dropping column "Robot Protective Stop", "Grip Lost" and "Tool Current". Shape: (7409, 35)


In [4]:
# Checking the dataset columns
columns = data.columns.tolist()
print("Features identified:", columns)
print("Features Count:", len(columns))

Features identified: ['Time Phase', 'Cycle Time', 'Current_J0', 'Current_J1', 'Current_J2', 'Current_J3', 'Current_J4', 'Current_J5', 'Current_Direction_J0', 'Current_Direction_J1', 'Current_Direction_J2', 'Current_Direction_J3', 'Current_Direction_J4', 'Current_Direction_J5', 'Speed_J0', 'Speed_J1', 'Speed_J2', 'Speed_J3', 'Speed_J4', 'Speed_J5', 'Speed_Direction_J0', 'Speed_Direction_J1', 'Speed_Direction_J2', 'Speed_Direction_J3', 'Speed_Direction_J4', 'Speed_Direction_J5', 'Temperature_T0', 'Temperature_J1', 'Temperature_J2', 'Temperature_J3', 'Temperature_J4', 'Temperature_J5', 'Average Temperature', 'Gradient Temperature', 'Load Imbalance']
Features Count: 35


#### Spitting the dataset into train, validation and test sets

In [5]:
# Selecting the features and targets from the dataset for the 3 unique target sets
features = columns

target_cj = [
    *[f'Current_J{i}' for i in range(6)]
]

target_sj = [
    *[f'Speed_J{i}' for i in range(6)]
]

target_tj = [
    'Temperature_T0', *[f'Temperature_J{i}' for i in range(1, 6)]
]

In [6]:
# Splitting the datasets into train, test, and validation sets
trd_cj, trl_cj, vad_cj, val_cj, ted_cj, tel_cj = utils.split_data(data, features, target_cj)
print(f'Target variable "Current joints" dataset - Train feature Shape: {trd_cj.shape}, Train target Shape: {trl_cj.shape}')
print(f'Target variable "Current joints" dataset - Validation features Shape: {vad_cj.shape}, Validation target Shape: {val_cj.shape}')
print(f'Target variable "Current joints" dataset - Test features Shape: {ted_cj.shape}, Test target Shape: {tel_cj.shape}')

trd_sj, trl_sj, vad_sj, val_sj, ted_sj, tel_sj = utils.split_data(data, features, target_sj)
print(f'\nTarget variable "Speed joints" dataset - Train features Shape: {trd_sj.shape}, Train target Shape: {trl_sj.shape}')
print(f'Target variable "Speed joints" dataset - Validation features Shape: {vad_sj.shape}, Validation target Shape: {val_sj.shape}')
print(f'Target variable "Speed joints" dataset - Test features Shape: {ted_sj.shape}, Test target Shape: {tel_sj.shape}')

trd_tj, trl_tj, vad_tj, val_tj, ted_tj, tel_tj = utils.split_data(data, features, target_tj)
print(f'\nTarget variable "Temperature joints" dataset - Train features Shape: {trd_tj.shape}, Train target Shape: {trl_tj.shape}')
print(f'Target variable "Temperature joints" dataset - Validation features Shape: {vad_tj.shape}, Validation target Shape: {val_tj.shape}')
print(f'Target variable "Temperature joints" dataset - Test features Shape: {ted_tj.shape}, Test target Shape: {tel_tj.shape}')

Target variable "Current joints" dataset - Train feature Shape: (4772, 35), Train target Shape: (4772, 6)
Target variable "Current joints" dataset - Validation features Shape: (1737, 35), Validation target Shape: (1737, 6)
Target variable "Current joints" dataset - Test features Shape: (899, 35), Test target Shape: (899, 6)

Target variable "Speed joints" dataset - Train features Shape: (4772, 35), Train target Shape: (4772, 6)
Target variable "Speed joints" dataset - Validation features Shape: (1737, 35), Validation target Shape: (1737, 6)
Target variable "Speed joints" dataset - Test features Shape: (899, 35), Test target Shape: (899, 6)

Target variable "Temperature joints" dataset - Train features Shape: (4772, 35), Train target Shape: (4772, 6)
Target variable "Temperature joints" dataset - Validation features Shape: (1737, 35), Validation target Shape: (1737, 6)
Target variable "Temperature joints" dataset - Test features Shape: (899, 35), Test target Shape: (899, 6)


In [7]:
# Saving the datasets
datasets = {
    "cj": {"train": (trd_cj, trl_cj), "valid": (vad_cj, val_cj), "test": (ted_cj, tel_cj)},
    "sj": {"train": (trd_sj, trl_sj), "valid": (vad_sj, val_sj), "test": (ted_sj, tel_sj)},
    "tj": {"train": (trd_tj, trl_tj), "valid": (vad_tj, val_tj), "test": (ted_tj, tel_tj)}
}

for key, splits in datasets.items():
    for split, (data, labels) in splits.items():
        utils.save_data_csv(data, f"data/processed/{key}/{split}", f"{split}_data_{key}.csv")
        utils.save_data_csv(labels, f"data/processed/{key}/{split}", f"{split}_labels_{key}.csv")

Dataset saved: data/processed/cj/train\train_data_cj.csv
Dataset saved: data/processed/cj/train\train_labels_cj.csv
Dataset saved: data/processed/cj/valid\valid_data_cj.csv
Dataset saved: data/processed/cj/valid\valid_labels_cj.csv
Dataset saved: data/processed/cj/test\test_data_cj.csv
Dataset saved: data/processed/cj/test\test_labels_cj.csv
Dataset saved: data/processed/sj/train\train_data_sj.csv
Dataset saved: data/processed/sj/train\train_labels_sj.csv
Dataset saved: data/processed/sj/valid\valid_data_sj.csv
Dataset saved: data/processed/sj/valid\valid_labels_sj.csv
Dataset saved: data/processed/sj/test\test_data_sj.csv
Dataset saved: data/processed/sj/test\test_labels_sj.csv
Dataset saved: data/processed/tj/train\train_data_tj.csv
Dataset saved: data/processed/tj/train\train_labels_tj.csv
Dataset saved: data/processed/tj/valid\valid_data_tj.csv
Dataset saved: data/processed/tj/valid\valid_labels_tj.csv
Dataset saved: data/processed/tj/test\test_data_tj.csv
Dataset saved: data/proce

#### Reordering the datasets

In [8]:
# Setting the new order for the datasets
new_order = [
    'Time Phase', 'Cycle Time',
    *[f'Current_J{i}' for i in range(6)],
    *[f'Current_Direction_J{i}' for i in range(6)],
    *[f'Speed_J{i}' for i in range(6)],
    *[f'Speed_Direction_J{i}' for i in range(6)],
    'Temperature_T0', *[f'Temperature_J{i}' for i in range(1, 6)],
    'Average Temperature', 'Gradient Temperature', 'Load Imbalance'
]

In [9]:
# Reordering the datasets
trd_cj = trd_cj[new_order]
vad_cj = vad_cj[new_order]
ted_cj = ted_cj[new_order]

trd_sj = trd_sj[new_order]
vad_sj = vad_sj[new_order]
ted_sj = ted_sj[new_order]

trd_tj = trd_tj[new_order]
vad_tj = vad_tj[new_order]
ted_tj = ted_tj[new_order]

In [10]:
# Saving the datasets
datasets = {
    "cj": {"train": trd_cj, "valid": vad_cj, "test": ted_cj},
    "sj": {"train": trd_sj, "valid": vad_sj, "test": ted_sj},
    "tj": {"train": trd_tj, "valid": vad_tj, "test": ted_tj}
}

for key, splits in datasets.items():
    for split, data in splits.items():
        utils.save_data_csv(data, f"data/processed/{key}/{split}", f"scaled_{split}_data_{key}.csv")

Dataset saved: data/processed/cj/train\scaled_train_data_cj.csv
Dataset saved: data/processed/cj/valid\scaled_valid_data_cj.csv
Dataset saved: data/processed/cj/test\scaled_test_data_cj.csv
Dataset saved: data/processed/sj/train\scaled_train_data_sj.csv
Dataset saved: data/processed/sj/valid\scaled_valid_data_sj.csv
Dataset saved: data/processed/sj/test\scaled_test_data_sj.csv
Dataset saved: data/processed/tj/train\scaled_train_data_tj.csv
Dataset saved: data/processed/tj/valid\scaled_valid_data_tj.csv
Dataset saved: data/processed/tj/test\scaled_test_data_tj.csv


In [11]:
# Reordering the datasets
for key, splits in datasets.items():
    for split, data in splits.items():
        datasets[key][split] = data[new_order]

#### Creating sequences from the datasets

In [12]:
# Loading the sequence length value
with open("data/processed/sequence_length.txt", "r") as file:
    sequence_length = int(file.read().strip())

In [13]:
# Creating sequences for training, validation, and test datasets
trd_cj, trl_cj = utils.create_sequences(trd_cj, trl_cj, sequence_length, "regression")
vad_cj, val_cj = utils.create_sequences(vad_cj, val_cj, sequence_length, "regression")
ted_cj, tel_cj = utils.create_sequences(ted_cj, tel_cj, sequence_length, "regression")

trd_sj, trl_sj = utils.create_sequences(trd_sj, trl_sj, sequence_length, "regression")
vad_sj, val_sj = utils.create_sequences(vad_sj, val_sj, sequence_length, "regression")
ted_sj, tel_sj = utils.create_sequences(ted_sj, tel_sj, sequence_length, "regression")

trd_tj, trl_tj = utils.create_sequences(trd_tj, trl_tj, sequence_length, "regression")
vad_tj, val_tj = utils.create_sequences(vad_tj, val_tj, sequence_length, "regression")
ted_tj, tel_tj = utils.create_sequences(ted_tj, tel_tj, sequence_length, "regression")

In [14]:
# Checking if the sequences are created properly
datasets = {
    'Current joints': [trd_cj, trl_cj, vad_cj, val_cj, ted_cj, tel_cj],
    'Speed joints': [trd_sj, trl_sj, vad_sj, val_sj, ted_sj, tel_sj],
    'Temperature joints': [trd_tj, trl_tj, vad_tj, val_tj, ted_tj, tel_tj]
}

for label, (x_train, y_train, x_valid, y_valid, x_test, y_test) in datasets.items():
    print(f"Train data \"{label}\": {x_train.shape}, Train labels \"{label}\": {y_train.shape}")
    print(f"Valid data \"{label}\": {x_valid.shape}, Valid labels \"{label}\": {y_valid.shape}")
    print(f"Test data \"{label}\": {x_test.shape}, Test labels \"{label}\": {y_test.shape}")

Train data "Current joints": (4742, 30, 35), Train labels "Current joints": (4742, 6)
Valid data "Current joints": (1707, 30, 35), Valid labels "Current joints": (1707, 6)
Test data "Current joints": (869, 30, 35), Test labels "Current joints": (869, 6)
Train data "Speed joints": (4742, 30, 35), Train labels "Speed joints": (4742, 6)
Valid data "Speed joints": (1707, 30, 35), Valid labels "Speed joints": (1707, 6)
Test data "Speed joints": (869, 30, 35), Test labels "Speed joints": (869, 6)
Train data "Temperature joints": (4742, 30, 35), Train labels "Temperature joints": (4742, 6)
Valid data "Temperature joints": (1707, 30, 35), Valid labels "Temperature joints": (1707, 6)
Test data "Temperature joints": (869, 30, 35), Test labels "Temperature joints": (869, 6)


In [15]:
# Saving the numpy arrays
arrays = {
    "cj": {"train": (trd_cj, trl_cj), "valid": (vad_cj, val_cj), "test": (ted_cj, tel_cj)},
    "sj": {"train": (trd_sj, trl_sj), "valid": (vad_sj, val_sj), "test": (ted_sj, tel_sj)},
    "tj": {"train": (trd_tj, trl_tj), "valid": (vad_tj, val_tj), "test": (ted_tj, tel_tj)}
}

for key, splits in arrays.items():
    for split, data in splits.items():
        folder = f"data/processed/{key}/{split}/sequences"
        utils.save_sequences(data, folder, f"seq_{split}_data_{key}.npy", f"seq_{split}_labels_{key}.npy")

Saved sequences to: data\processed\cj\train\sequences
Saved sequences to: data\processed\cj\valid\sequences
Saved sequences to: data\processed\cj\test\sequences
Saved sequences to: data\processed\sj\train\sequences
Saved sequences to: data\processed\sj\valid\sequences
Saved sequences to: data\processed\sj\test\sequences
Saved sequences to: data\processed\tj\train\sequences
Saved sequences to: data\processed\tj\valid\sequences
Saved sequences to: data\processed\tj\test\sequences


In [16]:
# Freeing up memory
gc.collect()

20