In [21]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from scipy.signal import butter, filtfilt
from scipy.fft import fft, fftfreq
import numpy as np

In [22]:
def sliding_window_pd(data, ws, overlap, w_type=None, w_center=True, print_stats=True):
    step_size = int(ws * (1 - overlap))
    windows = []
    for start in range(0, len(data) - ws + 1, step_size):
        end = start + ws
        window = data[start:end]
        windows.append(window)
    return np.array(windows)

def apply_filter(data, order=5, wn=0.1, filter_type='lowpass'):
    b, a = butter(order, wn, btype=filter_type)
    return filtfilt(b, a, data)

def transform_to_frequency_domain(filtered_data, sampling_rate):
    n = len(filtered_data)
    yf = fft(filtered_data)
    xf = fftfreq(n, 1 / sampling_rate)
    return xf, np.abs(yf)

In [23]:
# Path to the data directory
data_path = os.path.join(os.getcwd(), "data")

# List all class folders
classes_folders_list = [f for f in os.listdir(data_path) if os.path.isdir(os.path.join(data_path, f))]

# Define window size and overlap
window_size = 50  # Window size in samples
overlap = 0.8  # Overlap as a fraction

# Columns to apply the sliding window function
columns = ['x-axis (g)', 'y-axis (g)', 'z-axis (g)', 'x-axis (deg/s)', 'y-axis (deg/s)', 'z-axis (deg/s)']

# Define sampling rate
sampling_rate = 100  # Define sampling rate in Hz

In [24]:
filtered_windowed_data = {}
# Process each file in each class folder
for class_folder in classes_folders_list:
    folder_path = os.path.join(data_path, class_folder)
    files_in_folder = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]
    
    window_path = os.path.join(os.getcwd(), "windowed_data", class_folder)
    if os.path.exists(window_path):
                print(f"Skipping {class_folder} as it already exists.")
                continue
    for file_name in files_in_folder:
        file_path = os.path.join(folder_path, file_name)
        df = pd.read_csv(file_path)
        
        window_path = os.path.join(os.getcwd(), "windowed_data", class_folder)
        os.makedirs(window_path, exist_ok=True)
        
        # Apply the sliding window function to each column
        windowed_data = {}
        windowed_filtered_data = {}
        for col in columns:
            windowed_data[col] = sliding_window_pd(df[col].values, ws=window_size, overlap=overlap, w_type=None, w_center=True, print_stats=True)
            windowed_filtered_data[col] = apply_filter(windowed_data[col], order=5, wn=0.1, filter_type='lowpass')
            
        num_windows = len(next(iter(windowed_filtered_data.values())))
       
       

        # Save filtered windows to CSV
        for i in range(num_windows):
              
            window_dict = {col: windowed_filtered_data[col][i] for col in columns}
            filtered_windows_df = pd.DataFrame(window_dict)

            # Construct the file path
            file_name_without_extension = os.path.splitext(file_name)[0]
            filtered_windows_file = os.path.join(window_path, f"{file_name_without_extension}_window{i+1}.csv")

            # Save the DataFrame to CSV
            filtered_windows_df.to_csv(filtered_windows_file, index=False)


        # windowed_data now contains the windowed data for each specified column
        for col, windows in windowed_data.items():
            print(f"Column: {col}, Windows shape: {windows.shape}")

        # Step 3: Barplot of Count of Instances after Windowing
        window_count = len(windows)


        # Transform Data into Frequency Domain
        for col, windows in windowed_filtered_data.items():
            for i, window in enumerate(windows):
                xf, yf = transform_to_frequency_domain(window, sampling_rate)
                plt.plot(xf, yf, label=f'Window {i+1}')
            plt.xlabel('Frequency (Hz)')
            plt.ylabel('Amplitude')
            plt.title(f'Frequency Domain Representation for {col}')
            plt.legend()
            plt.show()

        # Plot Original and Filtered Data for Each Window
        for col in columns:
            for i in range(len(windowed_filtered_data[col])):
                plt.figure(figsize=(12, 6))

                # # Original data
                plt.subplot(2, 1, 1)
                plt.plot(windowed_data[col][i])
                plt.title(f'Original Data for {col} - Window {i+1}')
                plt.xlabel('Sample')
                plt.ylabel('Value')

                # Filtered data
                plt.subplot(2, 1, 2)
                plt.plot(windowed_filtered_data[col][i])
                plt.title(f'Filtered Data for {col} - Window {i+1}')
                plt.xlabel('Sample')
                plt.ylabel('Value')

                plt.tight_layout()
                plt.show()

Column: x-axis (g), Windows shape: (14, 50)
Column: y-axis (g), Windows shape: (14, 50)
Column: z-axis (g), Windows shape: (14, 50)
Column: x-axis (deg/s), Windows shape: (14, 50)
Column: y-axis (deg/s), Windows shape: (14, 50)
Column: z-axis (deg/s), Windows shape: (14, 50)
Column: x-axis (g), Windows shape: (14, 50)
Column: y-axis (g), Windows shape: (14, 50)
Column: z-axis (g), Windows shape: (14, 50)
Column: x-axis (deg/s), Windows shape: (14, 50)
Column: y-axis (deg/s), Windows shape: (14, 50)
Column: z-axis (deg/s), Windows shape: (14, 50)
Column: x-axis (g), Windows shape: (16, 50)
Column: y-axis (g), Windows shape: (16, 50)
Column: z-axis (g), Windows shape: (16, 50)
Column: x-axis (deg/s), Windows shape: (16, 50)
Column: y-axis (deg/s), Windows shape: (16, 50)
Column: z-axis (deg/s), Windows shape: (16, 50)
Column: x-axis (g), Windows shape: (14, 50)
Column: y-axis (g), Windows shape: (14, 50)
Column: z-axis (g), Windows shape: (14, 50)
Column: x-axis (deg/s), Windows shape: (