In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import find_peaks
from scipy.fft import fft

In [7]:
df = pd.read_csv('Dataset/dataset.csv')
df.head()

Unnamed: 0,X,Y,Z,Mixed,ClassLabel
0,0.125022,0.094986,0.001297,0.157018,1
1,0.15071,0.083282,-0.023514,0.173788,1
2,0.102941,0.111084,0.010075,0.151782,1
3,0.03845,0.049911,0.007511,0.063451,1
4,-0.029148,-0.105423,0.017124,0.110711,1


In [8]:
person1 = df[df['ClassLabel'] == 1]
person2 = df[df['ClassLabel'] == 2]
person3 = df[df['ClassLabel'] == 3]
person4 = df[df['ClassLabel'] == 4]
person5 = df[df['ClassLabel'] == 5]

In [11]:
def remove_outliers_iqr(data, column):
    q1 = data[column].quantile(0.25)
    q3 = data[column].quantile(0.75)
    iqr = q3 - q1
    lower_bound = q1 - 1.5 * iqr
    upper_bound = q3 + 1.5 * iqr
    return data[(data[column] >= lower_bound) & (data[column] <= upper_bound)]

def calculate_metrics(window):
    mean = window.mean()
    std_dev = window.std()
    energy = np.sum(np.square(window))
    hist = np.histogram(window, bins=10, density=True)[0]
    entropy = -np.sum(hist * np.log2(hist + 1e-10))
    peaks, _ = find_peaks(window, height=0)
    num_peaks = len(peaks)
    return mean, std_dev, energy, entropy, num_peaks



In [13]:
def apply_fft_and_extract_features(data, window_size=100):
    all_metrics = []
    for index, person_df in enumerate(data):
        # Remove outliers for each column
        for col in ['X', 'Y', 'Z', 'Mixed']:
            person_df = remove_outliers_iqr(person_df, col)
        
        # Apply FFT and calculate metrics for each column
        metrics = {f'{metric_name}_{col}': [] for col in ['X', 'Y', 'Z', 'Mixed'] for metric_name in ['Mean', 'Std Dev', 'Energy', 'Entropy', 'Peaks']}
        
        for i in range(0, len(person_df) - window_size + 1, window_size):
            for col in ['X', 'Y', 'Z', 'Mixed']:
                window = person_df[col].iloc[i:i + window_size].values
                window_fft = np.abs(fft(window))
                mean, std_dev, energy, entropy, num_peaks = calculate_metrics(window_fft)
                metrics[f'Mean_{col}'].append(mean)
                metrics[f'Std Dev_{col}'].append(std_dev)
                metrics[f'Energy_{col}'].append(energy)
                metrics[f'Entropy_{col}'].append(entropy)
                metrics[f'Peaks_{col}'].append(num_peaks)
        
        result_df = pd.DataFrame(metrics)
        result_df['category'] = index + 1  # Add category based on index (+1 to start from 1)
        all_metrics.append(result_df)
    
    # Concatenate all DataFrames into one
    combined_df = pd.concat(all_metrics)
    return combined_df

In [14]:
# Define the personas
personas = [person1, person2, person3, person4, person5]
# Apply FFT and extract features
combined_df = apply_fft_and_extract_features(personas)

# Save combined DataFrame to CSV
combined_df.to_csv('Dataset/fft_combined_metrics.csv', index=False)