In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import yaml
from utils.data_processing import df_rebase, butter_lowpass_filter, split_windows
import numpy as np

# Load configuration
config_path = os.path.join(os.getcwd(), "..", "config.yml")
with open(config_path) as file:
    config = yaml.load(file, Loader=yaml.FullLoader)

# Load dataset
data_path = os.path.join(os.getcwd(), "..", config["data_path"])

def load_data(data_path):
    classes_folders_list = [f for f in os.listdir(data_path) if os.path.isdir(os.path.join(data_path, f))]
    data = {}
    for class_folder in classes_folders_list:
        class_folder_path = os.path.join(data_path, class_folder)
        files_in_folder = [f for f in os.listdir(class_folder_path) if os.path.isfile(os.path.join(class_folder_path, f))]
        class_data = []
        for file in files_in_folder:
            file_path = os.path.join(class_folder_path, file)
            df = pd.read_csv(file_path)
            df = df_rebase(df, config['order'], config['rename'])
            class_data.append(df)
        data[class_folder] = class_data
    return data

data = load_data(data_path)

# Plot the time-length of instances per class
time_lengths = {key: [len(instance) for instance in value] for key, value in data.items()}
class_lengths = {key: sum(value) for key, value in time_lengths.items()}

plt.bar(class_lengths.keys(), class_lengths.values())
plt.ylabel('Time-length (samples)')
plt.title('Time-length of Collected Instances for Each Class')
plt.show()

# Split data into fixed windows
window_size = config["sliding_window"]["ws"]
overlap = config["sliding_window"]["overlap"]

windowed_data = {key: [split_windows(instance, window_size, overlap) for instance in value] for key, value in data.items()}
windowed_counts = {key: sum(len(windows) for windows in value) for key, value in windowed_data.items()}

plt.bar(windowed_counts.keys(), windowed_counts.values())
plt.ylabel('Count of Instances')
plt.title('Count of Instances After Windowing for Each Class')
plt.show()

# Filter the data
cutoff = config["filter"]["wn"]
fs = 50  # Sample rate
order = config["filter"]["order"]

for class_label, instances in data.items():
    for i in range(len(instances)):
        instances[i]["acc_x"] = butter_lowpass_filter(instances[i]["acc_x"], cutoff, fs, order)
        instances[i]["acc_y"] = butter_lowpass_filter(instances[i]["acc_y"], cutoff, fs, order)
        instances[i]["acc_z"] = butter_lowpass_filter(instances[i]["acc_z"], cutoff, fs, order)

# Transform data into frequency domain
data_fft = {key: [pd.DataFrame(np.abs(np.fft.fft(instance, axis=0)), columns=config['rename']) for instance in value] for key, value in data.items()}

# Visualize time-series instance after filtering
sample_instance = list(data.values())[0][0]
plt.figure(figsize=(12, 6))
plt.plot(sample_instance["acc_x"], label='acc_x')
plt.plot(sample_instance["acc_y"], label='acc_y')
plt.plot(sample_instance["acc_z"], label='acc_z')
plt.xlabel('Samples')
plt.ylabel('Amplitude')
plt.title('Filtered Signal')
plt.legend()
plt.show()
