In [17]:
import glob
import re
import os
import pathlib
import pandas as pd
import matplotlib.pyplot as plt
import scipy.signal
import statistics
import collections

In [18]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob
import gc
import time
import math
from scipy import signal
from scipy.fftpack import fft, fftfreq
from scipy.stats import iqr
from scipy.stats import pearsonr
from scipy.stats import skew
from scipy.stats import kurtosis
from spectrum import *
from statsmodels.robust import mad as median_deviation
from sklearn.preprocessing import minmax_scale
import os

# feature extraction functions

In [1]:
def sliding_window(data, window, step_size):
    shape = (int(data.shape[-1] / window * window / step_size - 1), window)
    strides = (data.strides[-1] * step_size, data.strides[-1])
    return np.lib.stride_tricks.as_strided(data, shape=shape, strides=strides)


def median_filter(data, f_size=3):
    median_signal = signal.medfilt(data, f_size)
    return np.insert(median_signal, 0, median_signal[0])


def calc_grav_body_signal(t_signal, freq1=0.3, freq2=20.0, fs=100.0):
    f_signal = fft(t_signal)
    freqs = np.array(fftfreq(len(t_signal), d=1 / float(fs)))  # frequency values between [-25hz:+25hz]

    f_grav_signal = []  # grav component in freq domain
    f_body_signal = []  # body component in freq domain numpy.append(a, a[0])
    f_noise_signal = []  # noise in freq domain
    for i in range(len(freqs)):  # iterate over all available frequencies
        # selecting the frequency value
        freq = freqs[i]

        # selecting the f_signal value associated to freq
        value = f_signal[i]

        # Selecting DC_component values
        if abs(freq) > 0.3:  # testing if freq is outside DC_component frequency ranges
            f_grav_signal.append(float(
                0))  # add 0 to  the  list if it was the case (the value should not be added)
        else:  # if freq is inside DC_component frequency ranges
            f_grav_signal.append(value)  # add f_signal value to f_DC_signal list

        # Selecting body_component values
        if (abs(freq) <= 0.3 or abs(freq) > 20):  # testing if freq is outside Body_component frequency ranges
            f_body_signal.append(float(0))  # add 0 to  f_body_signal list
        else:  # if freq is inside Body_component frequency ranges
            f_body_signal.append(value)  # add f_signal value to f_body_signal list

    t_grav_signal = ifft(np.array(f_grav_signal)).real
    t_body_signal = ifft(np.array(f_body_signal)).real

    return t_grav_signal, t_body_signal


def calc_jerk(data, fs=50.0):
    # t = np.diff(data, axis=0)
    # return np.insert(t, 0, 0)
    dt = 1.0 / fs
    jerk_signal = np.array([(data[i + 1] - data[i]) / dt for i in range(len(data) - 1)])

    return np.insert(jerk_signal, 0, jerk_signal[0])


def magnitude(x, y, z):
    return np.linalg.norm([x, y, z])


def graph_plot(data):
    pd.DataFrame(data).plot()


def calc_fft_signal(data):
    shape = data.shape
    N = shape[0]  # FFTのサンプル数
    #     hanningWindow = np.hanning(N)  # ハニング窓
    #     fqy = np.abs(fft(hanningWindow * data))

    fqy = fft(data)

    fqy = 2.0 / N * np.abs(fqy[0:N // 2])

    # return fqy[0:int(shape[0] / 2)]
    return fqy


def min_max(x, axis=None):
    min = x.min(axis=axis, keepdims=True)
    max = x.max(axis=axis, keepdims=True)
    result = (x - min) / (max - min)
    return result


def zscore(x, axis=None):
    xmean = x.mean(axis=axis, keepdims=True)
    xstd = np.std(x, axis=axis, keepdims=True)
    zscore = (x - xmean) / xstd
    return zscore


def mad(data, axis=None):
    """
    Compute *Median Absolute Deviation* of an array along given axis.
    """
    #     med = np.median(data, axis=axis, keepdims=True)
    #     mad = np.median(np.absolute(data - med), axis=axis)  # MAD along given axis
    mad_value = float(median_deviation(data))
    # print(mad_value)
    return mad_value


def sma(data_x, data_y, data_z):
    """
    Compute *Signal magnitude area*.
    """
    sum = 0
    # print(data_x)
    for i in range(len(data_x)):
        sum += (abs(data_x[i]) + abs(data_y[i]) + abs(data_z[i]))
    return sum / len(data_x)


def sma_mag(data_mag):
    """
    Compute *Signal magnitude area*.
    """
    sum = 0
    for i in range(len(data_mag)):
        sum += abs(data_mag[i])

    return sum / len(data_mag)


def energy(data):
    """
    Compute *Energy measure value*.
    """
    energy = np.sum(data ** 2) / len(data)
    return energy


def shannon_entropy(data):
    ent = 0.0
    for freq in data:
        if freq > 0:
            ent = ent + freq * math.log(freq, 2)
    ent = -ent

    return ent


def arburg2(X, order):
    """This version is 10 times faster than arburg, but the output rho is not correct.
    returns [1 a0,a1, an-1]
    """
    x = numpy.array(X)
    N = len(x)

    if order == 0.:
        raise ValueError("order must be > 0")

    # Initialisation
    # ------ rho, den
    rho = sum(abs(x) ** 2.) / N  # Eq 8.21 [Marple]_
    den = rho * 2. * N

    # ------ backward and forward errors
    ef = numpy.zeros(N, dtype=complex)
    eb = numpy.zeros(N, dtype=complex)
    for j in range(0, N):  # eq 8.11
        ef[j] = x[j]
        eb[j] = x[j]

    # AR order to be stored
    a = numpy.zeros(1, dtype=complex)
    a[0] = 1
    # ---- rflection coeff to be stored
    ref = numpy.zeros(order, dtype=complex)

    E = numpy.zeros(order + 1)
    E[0] = rho

    for m in range(0, order):
        # print m
        # Calculate the next order reflection (parcor) coefficient
        efp = ef[1:]
        ebp = eb[0:-1]
        # print efp, ebp
        num = -2. * numpy.dot(ebp.conj().transpose(), efp)
        den = numpy.dot(efp.conj().transpose(), efp)
        den += numpy.dot(ebp, ebp.conj().transpose())
        ref[m] = num / den

        # Update the forward and backward prediction errors
        ef = efp + ref[m] * ebp
        eb = ebp + ref[m].conj().transpose() * efp

        # Update the AR coeff.
        a.resize(len(a) + 1)
        a = a + ref[m] * numpy.flipud(a).conjugate()

        # Update the prediction error
        E[m + 1] = numpy.real((1 - ref[m].conj().transpose() * ref[m])) * E[m]
        # print 'REF', ref, num, den
    return a, E[-1], ref


def arCoeff(data):
    """
    Compute *Autorregresion coefficients*.
    """
    if np.std(data) < 0.1:
        AR = [0.0, 0.0, 0.0, 0.0]
    else:
        AR = arburg2(minmax_scale(data), 4)[0].real[1:]

    return list(AR)


def rms(data):
    """
    Compute *root mean square* of an array.
    """
    rms = np.sqrt(np.mean(data ** 2))

    return rms


def Correlation_2signal(data1, data2):
    data1 = np.array(data1)
    data2 = np.array(data2)

    correlation = pearsonr(data1, data2)[0]

    if np.isnan(correlation):
        return 0.0
    else:
        return float(correlation)

### time_xyz_feature_extraction

time_xyz_feature_names = ['t-Mean-1',
't-Mean-2',
't-Mean-3',
't-STD-1',
't-STD-2',
't-STD-3',
't-Mad-1',
't-Mad-2',
't-Mad-3',
't-Max-1',
't-Max-2',
't-Max-3',
't-Min-1',
't-Min-2',
't-Min-3',
't-SMA-1',
't-Energy-1',
't-Energy-2',
't-Energy-3',
't-IQR-1',
't-IQR-2',
't-IQR-3',
't-ropy-1',
't-ropy-2',
't-ropy-3',
't-ARCoeff-1',
't-ARCoeff-2',
't-ARCoeff-3',
't-ARCoeff-4',
't-ARCoeff-5',
't-ARCoeff-6',
't-ARCoeff-7',
't-ARCoeff-8',
't-ARCoeff-9',
't-ARCoeff-10',
't-ARCoeff-11',
't-ARCoeff-12',
't-Correlation-1',
't-Correlation-2',
't-Correlation-3']

len(time_xyz_feature_names)

def time_xyz_feature_extraction(data_x, data_y, data_z):
    f_mean_x = data_x.mean()
    f_mean_y = data_y.mean()
    f_mean_z = data_z.mean()
    f_std_x = data_x.std()
    f_std_y = data_y.std()
    f_std_z = data_z.std()
    f_mad_x = mad(data_x)
    f_mad_y = mad(data_y)
    f_mad_z = mad(data_z)
    f_max_x = data_x.max()
    f_max_y = data_y.max()
    f_max_z = data_z.max()
    f_min_x = data_x.min()
    f_min_y = data_y.min()
    f_min_z = data_z.min()
    f_sma = sma(data_x, data_y, data_z)
    f_energy_x = energy(data_x)
    f_energy_y = energy(data_y)
    f_energy_z = energy(data_z)
    f_iqr_x = iqr(data_x)
    f_iqr_y = iqr(data_y)
    f_iqr_z = iqr(data_z)
    f_entropy_x = shannon_entropy(data_x)
    f_entropy_y = shannon_entropy(data_y)
    f_entropy_z = shannon_entropy(data_z)
    f_arCoeff_x = arCoeff(data_x)
    f_arCoeff_y = arCoeff(data_y)
    f_arCoeff_z = arCoeff(data_z)
    f_correlation_xy = pearsonr(data_x, data_y)[0]
    f_correlation_xz = pearsonr(data_x, data_z)[0]
    f_correlation_yz = pearsonr(data_y, data_z)[0]

    f_all = [f_mean_x, f_mean_y, f_mean_z, f_std_x, f_std_y, f_std_z, f_mad_x, f_mad_y, f_mad_z, \
             f_max_x, f_max_y, f_max_z, f_min_x, f_min_y, f_min_z, f_sma, f_energy_x, f_energy_y, f_energy_z, \
             f_iqr_x, f_iqr_y, f_iqr_z, f_entropy_x, f_entropy_y, f_entropy_z] + f_arCoeff_x + f_arCoeff_y + f_arCoeff_z \
            + [f_correlation_xy, f_correlation_xz, f_correlation_yz]
    # print(len(f_all))

    return np.array(f_all)

### time_mag_feature_extraction

time_mag_feature_names = [
't-Mean-mag',
't-STD-mag',
't-Mad-mag',
't-Max-mag',
't-Min-mag',
't-SMA-mag',
't-Energy-mag',
't-IQR-mag',
't-ropy-mag',
't-ARCoeff-mag1',
't-ARCoeff-mag2',
't-ARCoeff-mag3',
't-ARCoeff-mag4']

def time_mag_feature_extraction(data_mag):
    f_mean_mag = data_mag.mean()
    f_std_mag = data_mag.std()
    f_mad_mag = mad(data_mag)
    f_max_mag = data_mag.max()
    f_min_mag = data_mag.min()
    f_sma_mag = sma_mag(data_mag)
    f_energy_mag = energy(data_mag)
    f_iqr_mag = iqr(data_mag)
    f_entropy_mag = shannon_entropy(data_mag)
    f_arCoeff_mag = arCoeff(data_mag)

    f_all = [f_mean_mag, f_std_mag, f_mad_mag, \
             f_max_mag, f_min_mag, f_sma_mag, f_energy_mag, \
             f_iqr_mag, f_entropy_mag] + f_arCoeff_mag
    # print(len(f_all))

    return np.array(f_all)



### frequency_xyz_feature_extraction

frequency_xyz_feature_names = ['f-Mean-1',
'f-Mean-2',
'f-Mean-3',
'f-STD-1',
'f-STD-2',
'f-STD-3',
'f-Mad-1',
'f-Mad-2',
'f-Mad-3',
'f-Max-1',
'f-Max-2',
'f-Max-3',
'f-Min-1',
'f-Min-2',
'f-Min-3',
'f-SMA-1',
'f-Energy-1',
'f-Energy-2',
'f-Energy-3',
'f-IQR-1',
'f-IQR-2',
'f-IQR-3',
'f-ropy-1',
'f-ropy-2',
'f-ropy-3',
'f-MaxInds-1',
'f-MaxInds-2',
'f-MaxInds-3',
'f-MeanFreq-1',
'f-MeanFreq-2',
'f-MeanFreq-3',
'f-Skewness-1',
'f-Skewness-2',
'f-Skewness-3',
'f-Kurtosis-1',
'f-Kurtosis-2',
'f-Kurtosis-3',
'f-BandsEnergyOld-1',
'f-BandsEnergyOld-2',
'f-BandsEnergyOld-3',
'f-BandsEnergyOld-4',
'f-BandsEnergyOld-5',
'f-BandsEnergyOld-6',
'f-BandsEnergyOld-7',
'f-BandsEnergyOld-8',
'f-BandsEnergyOld-9',
'f-BandsEnergyOld-10',
'f-BandsEnergyOld-11',
'f-BandsEnergyOld-12',
'f-BandsEnergyOld-13',
'f-BandsEnergyOld-14',
'f-BandsEnergyOld-15',
'f-BandsEnergyOld-16',
'f-BandsEnergyOld-17',
'f-BandsEnergyOld-18',
'f-BandsEnergyOld-19',
'f-BandsEnergyOld-20',
'f-BandsEnergyOld-21',
'f-BandsEnergyOld-22',
'f-BandsEnergyOld-23',
'f-BandsEnergyOld-24',
'f-BandsEnergyOld-25',
'f-BandsEnergyOld-26',
'f-BandsEnergyOld-27',
'f-BandsEnergyOld-28',
'f-BandsEnergyOld-29',
'f-BandsEnergyOld-30',
'f-BandsEnergyOld-31',
'f-BandsEnergyOld-32',
'f-BandsEnergyOld-33',
'f-BandsEnergyOld-34',
'f-BandsEnergyOld-35',
'f-BandsEnergyOld-36',
'f-BandsEnergyOld-37',
'f-BandsEnergyOld-38',
'f-BandsEnergyOld-39',
'f-BandsEnergyOld-40',
'f-BandsEnergyOld-41',
'f-BandsEnergyOld-42']

len(frequency_xyz_feature_names)

def frequency_xyz_feature_extraction(data_x, data_y, data_z, fs=50.0):
    df_freqList = pd.DataFrame(fftfreq(int(2 * len(data_x)), d=1.0 / fs)[0:len(data_x)])

    f_mean_x = data_x.mean()
    f_mean_y = data_y.mean()
    f_mean_z = data_z.mean()
    f_std_x = data_x.std()
    f_std_y = data_y.std()
    f_std_z = data_z.std()
    f_mad_x = mad(data_x)
    f_mad_y = mad(data_y)
    f_mad_z = mad(data_z)
    f_max_x = data_x.max()
    f_max_y = data_y.max()
    f_max_z = data_z.max()
    f_min_x = data_x.min()
    f_min_y = data_y.min()
    f_min_z = data_z.min()
    f_sma = sma(data_x, data_y, data_z)
    f_energy_x = energy(data_x)
    f_energy_y = energy(data_y)
    f_energy_z = energy(data_z)
    f_iqr_x = iqr(data_x)
    f_iqr_y = iqr(data_y)
    f_iqr_z = iqr(data_z)
    f_entropy_x = shannon_entropy(data_x)
    f_entropy_y = shannon_entropy(data_y)
    f_entropy_z = shannon_entropy(data_z)
    f_maxinds_x = df_freqList[0][data_x.argmax()]
    f_maxinds_y = df_freqList[0][data_y.argmax()]
    f_maxinds_z = df_freqList[0][data_z.argmax()]

    if np.average(data_x) == 0:
        f_meanFreq_x = np.average(df_freqList[0])
    else:
        f_meanFreq_x = np.average(df_freqList[0], weights=data_x)

    if np.average(data_y) == 0:
        f_meanFreq_y = np.average(df_freqList[0])
    else:
        f_meanFreq_y = np.average(df_freqList[0], weights=data_y)

    if np.average(data_z) == 0:
        f_meanFreq_z = np.average(df_freqList[0])
    else:
        f_meanFreq_z = np.average(df_freqList[0], weights=data_z)

    f_skewness_x = skew(data_x)
    f_skewness_y = skew(data_y)
    f_skewness_z = skew(data_z)
    f_kurtosis_x = kurtosis(data_x)
    f_kurtosis_y = kurtosis(data_y)
    f_kurtosis_z = kurtosis(data_z)

    f_bandsEnergy_x0 = np.sum(data_x.reshape(8, int(len(data_x) / 8))[0] ** 2) / len(data_x)
    f_bandsEnergy_x1 = np.sum(data_x.reshape(8, int(len(data_x) / 8))[1] ** 2) / len(data_x)
    f_bandsEnergy_x2 = np.sum(data_x.reshape(8, int(len(data_x) / 8))[2] ** 2) / len(data_x)
    f_bandsEnergy_x3 = np.sum(data_x.reshape(8, int(len(data_x) / 8))[3] ** 2) / len(data_x)
    f_bandsEnergy_x4 = np.sum(data_x.reshape(8, int(len(data_x) / 8))[4] ** 2) / len(data_x)
    f_bandsEnergy_x5 = np.sum(data_x.reshape(8, int(len(data_x) / 8))[5] ** 2) / len(data_x)
    f_bandsEnergy_x6 = np.sum(data_x.reshape(8, int(len(data_x) / 8))[6] ** 2) / len(data_x)
    f_bandsEnergy_x7 = np.sum(data_x.reshape(8, int(len(data_x) / 8))[7] ** 2) / len(data_x)
    f_bandsEnergy_x8 = np.sum(data_x.reshape(4, int(len(data_x) / 4))[0] ** 2) / len(data_x)
    f_bandsEnergy_x9 = np.sum(data_x.reshape(4, int(len(data_x) / 4))[1] ** 2) / len(data_x)
    f_bandsEnergy_x10 = np.sum(data_x.reshape(4, int(len(data_x) / 4))[2] ** 2) / len(data_x)
    f_bandsEnergy_x11 = np.sum(data_x.reshape(4, int(len(data_x) / 4))[3] ** 2) / len(data_x)
    f_bandsEnergy_x12 = np.sum(data_x.reshape(2, int(len(data_x) / 2))[0] ** 2) / len(data_x)
    f_bandsEnergy_x13 = np.sum(data_x.reshape(2, int(len(data_x) / 2))[1] ** 2) / len(data_x)

    f_bandsEnergy_y0 = np.sum(data_y.reshape(8, int(len(data_y) / 8))[0] ** 2) / len(data_y)
    f_bandsEnergy_y1 = np.sum(data_y.reshape(8, int(len(data_y) / 8))[1] ** 2) / len(data_y)
    f_bandsEnergy_y2 = np.sum(data_y.reshape(8, int(len(data_y) / 8))[2] ** 2) / len(data_y)
    f_bandsEnergy_y3 = np.sum(data_y.reshape(8, int(len(data_y) / 8))[3] ** 2) / len(data_y)
    f_bandsEnergy_y4 = np.sum(data_y.reshape(8, int(len(data_y) / 8))[4] ** 2) / len(data_y)
    f_bandsEnergy_y5 = np.sum(data_y.reshape(8, int(len(data_y) / 8))[5] ** 2) / len(data_y)
    f_bandsEnergy_y6 = np.sum(data_y.reshape(8, int(len(data_y) / 8))[6] ** 2) / len(data_y)
    f_bandsEnergy_y7 = np.sum(data_y.reshape(8, int(len(data_y) / 8))[7] ** 2) / len(data_y)
    f_bandsEnergy_y8 = np.sum(data_y.reshape(4, int(len(data_y) / 4))[0] ** 2) / len(data_y)
    f_bandsEnergy_y9 = np.sum(data_y.reshape(4, int(len(data_y) / 4))[1] ** 2) / len(data_y)
    f_bandsEnergy_y10 = np.sum(data_y.reshape(4, int(len(data_y) / 4))[2] ** 2) / len(data_y)
    f_bandsEnergy_y11 = np.sum(data_y.reshape(4, int(len(data_y) / 4))[3] ** 2) / len(data_y)
    f_bandsEnergy_y12 = np.sum(data_y.reshape(2, int(len(data_y) / 2))[0] ** 2) / len(data_y)
    f_bandsEnergy_y13 = np.sum(data_y.reshape(2, int(len(data_y) / 2))[1] ** 2) / len(data_y)

    f_bandsEnergy_z0 = np.sum(data_z.reshape(8, int(len(data_z) / 8))[0] ** 2) / len(data_z)
    f_bandsEnergy_z1 = np.sum(data_z.reshape(8, int(len(data_z) / 8))[1] ** 2) / len(data_z)
    f_bandsEnergy_z2 = np.sum(data_z.reshape(8, int(len(data_z) / 8))[2] ** 2) / len(data_z)
    f_bandsEnergy_z3 = np.sum(data_z.reshape(8, int(len(data_z) / 8))[3] ** 2) / len(data_z)
    f_bandsEnergy_z4 = np.sum(data_z.reshape(8, int(len(data_z) / 8))[4] ** 2) / len(data_z)
    f_bandsEnergy_z5 = np.sum(data_z.reshape(8, int(len(data_z) / 8))[5] ** 2) / len(data_z)
    f_bandsEnergy_z6 = np.sum(data_z.reshape(8, int(len(data_z) / 8))[6] ** 2) / len(data_z)
    f_bandsEnergy_z7 = np.sum(data_z.reshape(8, int(len(data_z) / 8))[7] ** 2) / len(data_z)
    f_bandsEnergy_z8 = np.sum(data_z.reshape(4, int(len(data_z) / 4))[0] ** 2) / len(data_z)
    f_bandsEnergy_z9 = np.sum(data_z.reshape(4, int(len(data_z) / 4))[1] ** 2) / len(data_z)
    f_bandsEnergy_z10 = np.sum(data_z.reshape(4, int(len(data_z) / 4))[2] ** 2) / len(data_z)
    f_bandsEnergy_z11 = np.sum(data_z.reshape(4, int(len(data_z) / 4))[3] ** 2) / len(data_z)
    f_bandsEnergy_z12 = np.sum(data_z.reshape(2, int(len(data_z) / 2))[0] ** 2) / len(data_z)
    f_bandsEnergy_z13 = np.sum(data_z.reshape(2, int(len(data_z) / 2))[1] ** 2) / len(data_z)

    f_all = [f_mean_x, f_mean_y, f_mean_z, f_std_x, f_std_y, f_std_z, f_mad_x, f_mad_y, f_mad_z, \
             f_max_x, f_max_y, f_max_z, f_min_x, f_min_y, f_min_z, f_sma, f_energy_x, f_energy_y, f_energy_z, \
             f_iqr_x, f_iqr_y, f_iqr_z, f_entropy_x, f_entropy_y, f_entropy_z, f_maxinds_x, f_maxinds_y, f_maxinds_z, \
             f_meanFreq_x, f_meanFreq_y, f_meanFreq_z, f_skewness_x, f_skewness_y, f_skewness_z, \
             f_kurtosis_x, f_kurtosis_y, f_kurtosis_z, f_bandsEnergy_x0, f_bandsEnergy_x1, f_bandsEnergy_x2, \
             f_bandsEnergy_x3, f_bandsEnergy_x4, f_bandsEnergy_x5, f_bandsEnergy_x6, f_bandsEnergy_x7, \
             f_bandsEnergy_x8, f_bandsEnergy_x9, f_bandsEnergy_x10, f_bandsEnergy_x11, f_bandsEnergy_x12,
             f_bandsEnergy_x13, \
             f_bandsEnergy_y0, f_bandsEnergy_y1, f_bandsEnergy_y2, \
             f_bandsEnergy_y3, f_bandsEnergy_y4, f_bandsEnergy_y5, f_bandsEnergy_y6, f_bandsEnergy_y7, \
             f_bandsEnergy_y8, f_bandsEnergy_y9, f_bandsEnergy_y10, f_bandsEnergy_y11, f_bandsEnergy_y12,
             f_bandsEnergy_y13, \
             f_bandsEnergy_z0, f_bandsEnergy_z1, f_bandsEnergy_z2, \
             f_bandsEnergy_z3, f_bandsEnergy_z4, f_bandsEnergy_z5, f_bandsEnergy_z6, f_bandsEnergy_z7, \
             f_bandsEnergy_z8, f_bandsEnergy_z9, f_bandsEnergy_z10, f_bandsEnergy_z11, f_bandsEnergy_z12,
             f_bandsEnergy_z13]

    # print(len(f_all))
    # print(f_all)

    return np.array(f_all)

### frequency_mag_feature_extraction
frequency_mag_feature_names = [
'f-Mean-mag',
'f-STD-mag',
'f-Mad-mag',
'f-Max-mag',
'f-Min-mag',
'f-SMA-mag',
'f-Energy-mag',
'f-IQR-mag',
'f-ropy-mag',
'f-MaxInds-mag',
'f-MeanFreq-mag',
'f-Skewness-mag',
'f-Kurtosis-mag']

def frequency_mag_feature_extraction(data_mag, fs=50.0):
    df_freqList = pd.DataFrame(fftfreq(int(2 * len(data_mag)), d=1.0 / fs)[0:len(data_mag)])

    f_mean_mag = data_mag.mean()

    f_std_mag = data_mag.std()

    f_mad_mag = mad(data_mag)

    f_max_mag = data_mag.max()

    f_min_mag = data_mag.min()

    f_sma_mag = sma_mag(data_mag)

    f_energy_mag = energy(data_mag)

    f_iqr_mag = iqr(data_mag)

    f_entropy_mag = shannon_entropy(data_mag)

    f_maxinds_mag = df_freqList[0][data_mag.argmax()]

    if np.average(data_mag) == 0:
        f_meanFreq_mag = np.average(df_freqList[0])
    else:
        f_meanFreq_mag = np.average(df_freqList[0], weights=data_mag)

    f_skewness_mag = skew(data_mag)

    f_kurtosis_mag = kurtosis(data_mag)

    f_all = [f_mean_mag, f_std_mag, f_mad_mag, \
             f_max_mag, f_min_mag, f_sma_mag, f_energy_mag, \
             f_iqr_mag, f_entropy_mag, f_maxinds_mag, f_meanFreq_mag, f_skewness_mag, f_kurtosis_mag]

    # print(len(f_all))

    return np.array(f_all)


# generate_all_features_xyz

def generate_all_features_xyz(signal_x, signal_y, signal_z, fs = 100):
    # Generate All Signal
    tsignal_X = median_filter(signal_x, f_size=3)
    tsignal_Y = median_filter(signal_y, f_size=3)
    tsignal_Z = median_filter(signal_z, f_size=3)

    tsignal_Mag = np.vectorize(magnitude)(tsignal_X, tsignal_Y, tsignal_Z)

    fsignal_X = calc_fft_signal(tsignal_X)
    fsignal_Y = calc_fft_signal(tsignal_Y)
    fsignal_Z = calc_fft_signal(tsignal_Z)

    fsignal_Mag = calc_fft_signal(tsignal_Mag)

    # Generate All Feature
    tBodyAcc_XYZ_features = time_xyz_feature_extraction(tsignal_X, tsignal_Y, tsignal_Z)
    tBodyAccMag_features = time_mag_feature_extraction(tsignal_Mag)
    fBodyAcc_XYZ_features = frequency_xyz_feature_extraction(fsignal_X, fsignal_Y, fsignal_Z, fs)
    fBodyAccMag_features = frequency_mag_feature_extraction(fsignal_Mag, fs)
#     print(len(tBodyAcc_XYZ_features))
#     print(len(tBodyAccMag_features))
#     print(len(fBodyAcc_XYZ_features))
#     print(len(fBodyAccMag_features))

    all_features = np.hstack([
        tBodyAcc_XYZ_features, 
        tBodyAccMag_features, 
        fBodyAcc_XYZ_features, 
        fBodyAccMag_features 
    ]).T

    return all_features

def generate_all_features_single(signal_x, fs = 100):
    # Generate All Signal
    tsignal_X = median_filter(signal_x, f_size=3)

    fsignal_X = calc_fft_signal(tsignal_X)

    # Generate All Feature
    tBodyAccMag_features = time_mag_feature_extraction(tsignal_X)
    fBodyAccMag_features = frequency_mag_feature_extraction(fsignal_X, fs)
#     print(len(tBodyAcc_XYZ_features))
#     print(len(tBodyAccMag_features))
#     print(len(fBodyAcc_XYZ_features))
#     print(len(fBodyAccMag_features))

    all_features = np.hstack([
        tBodyAccMag_features, 
        fBodyAccMag_features 
    ]).T

    return all_features



frequency_single_feature_names = [
'f-Mean',
'f-STD',
'f-Mad',
'f-Max',
'f-Min',
'f-SMA',
'f-Energy',
'f-IQR',
'f-ropy',
'f-MaxInds',
'f-MeanFreq',
'f-Skewness',
'f-Kurtosis']


time_single_feature_names = [
't-Mean',
't-STD',
't-Mad',
't-Max',
't-Min',
't-SMA',
't-Energy',
't-IQR',
't-ropy',
't-ARCoeff1',
't-ARCoeff2',
't-ARCoeff3',
't-ARCoeff4']

all_feature145_names = ['t-Mean-1',
 't-Mean-2',
 't-Mean-3',
 't-STD-1',
 't-STD-2',
 't-STD-3',
 't-Mad-1',
 't-Mad-2',
 't-Mad-3',
 't-Max-1',
 't-Max-2',
 't-Max-3',
 't-Min-1',
 't-Min-2',
 't-Min-3',
 't-SMA-1',
 't-Energy-1',
 't-Energy-2',
 't-Energy-3',
 't-IQR-1',
 't-IQR-2',
 't-IQR-3',
 't-ropy-1',
 't-ropy-2',
 't-ropy-3',
 't-ARCoeff-1',
 't-ARCoeff-2',
 't-ARCoeff-3',
 't-ARCoeff-4',
 't-ARCoeff-5',
 't-ARCoeff-6',
 't-ARCoeff-7',
 't-ARCoeff-8',
 't-ARCoeff-9',
 't-ARCoeff-10',
 't-ARCoeff-11',
 't-ARCoeff-12',
 't-Correlation-1',
 't-Correlation-2',
 't-Correlation-3',
 't-Mean-mag',
 't-STD-mag',
 't-Mad-mag',
 't-Max-mag',
 't-Min-mag',
 't-SMA-mag',
 't-Energy-mag',
 't-IQR-mag',
 't-ropy-mag',
 't-ARCoeff-mag1',
 't-ARCoeff-mag2',
 't-ARCoeff-mag3',
 't-ARCoeff-mag4',
 'f-Mean-1',
 'f-Mean-2',
 'f-Mean-3',
 'f-STD-1',
 'f-STD-2',
 'f-STD-3',
 'f-Mad-1',
 'f-Mad-2',
 'f-Mad-3',
 'f-Max-1',
 'f-Max-2',
 'f-Max-3',
 'f-Min-1',
 'f-Min-2',
 'f-Min-3',
 'f-SMA-1',
 'f-Energy-1',
 'f-Energy-2',
 'f-Energy-3',
 'f-IQR-1',
 'f-IQR-2',
 'f-IQR-3',
 'f-ropy-1',
 'f-ropy-2',
 'f-ropy-3',
 'f-MaxInds-1',
 'f-MaxInds-2',
 'f-MaxInds-3',
 'f-MeanFreq-1',
 'f-MeanFreq-2',
 'f-MeanFreq-3',
 'f-Skewness-1',
 'f-Skewness-2',
 'f-Skewness-3',
 'f-Kurtosis-1',
 'f-Kurtosis-2',
 'f-Kurtosis-3',
 'f-BandsEnergyOld-1',
 'f-BandsEnergyOld-2',
 'f-BandsEnergyOld-3',
 'f-BandsEnergyOld-4',
 'f-BandsEnergyOld-5',
 'f-BandsEnergyOld-6',
 'f-BandsEnergyOld-7',
 'f-BandsEnergyOld-8',
 'f-BandsEnergyOld-9',
 'f-BandsEnergyOld-10',
 'f-BandsEnergyOld-11',
 'f-BandsEnergyOld-12',
 'f-BandsEnergyOld-13',
 'f-BandsEnergyOld-14',
 'f-BandsEnergyOld-15',
 'f-BandsEnergyOld-16',
 'f-BandsEnergyOld-17',
 'f-BandsEnergyOld-18',
 'f-BandsEnergyOld-19',
 'f-BandsEnergyOld-20',
 'f-BandsEnergyOld-21',
 'f-BandsEnergyOld-22',
 'f-BandsEnergyOld-23',
 'f-BandsEnergyOld-24',
 'f-BandsEnergyOld-25',
 'f-BandsEnergyOld-26',
 'f-BandsEnergyOld-27',
 'f-BandsEnergyOld-28',
 'f-BandsEnergyOld-29',
 'f-BandsEnergyOld-30',
 'f-BandsEnergyOld-31',
 'f-BandsEnergyOld-32',
 'f-BandsEnergyOld-33',
 'f-BandsEnergyOld-34',
 'f-BandsEnergyOld-35',
 'f-BandsEnergyOld-36',
 'f-BandsEnergyOld-37',
 'f-BandsEnergyOld-38',
 'f-BandsEnergyOld-39',
 'f-BandsEnergyOld-40',
 'f-BandsEnergyOld-41',
 'f-BandsEnergyOld-42',
 'f-Mean-mag',
 'f-STD-mag',
 'f-Mad-mag',
 'f-Max-mag',
 'f-Min-mag',
 'f-SMA-mag',
 'f-Energy-mag',
 'f-IQR-mag',
 'f-ropy-mag',
 'f-MaxInds-mag',
 'f-MeanFreq-mag',
 'f-Skewness-mag',
 'f-Kurtosis-mag']






In [None]:
data_x.reshape(8, int(len(data_x) / 8))[5] ** 2) / len(data_x)

In [20]:
def add_acc(n):
    return "acc_" + n


acc_feature145_names = list(map(add_acc, all_feature145_names))


In [21]:
os.getcwd()

'/Users/y-nakamura/yugo_workspace/bar_exercise'

In [22]:
os.chdir('/Users/y-nakamura/yugo_workspace/bar_exercise')

In [23]:
os.getcwd()

'/Users/y-nakamura/yugo_workspace/bar_exercise'

In [31]:
dataset_dir_input = "/data/raw/20210716/"
dataset_dir_output = "/data/features/2021_dpsws/"
# dataset_dir_output2 = "/data/2021_dicomo_dataset_feature_par_class/"
# dataset_dir_raw_output = "/data/2021_dicomo_dataset_raw_par_class/"

In [32]:
target_files = [tf for tf in os.listdir(os.getcwd()+dataset_dir_input)  if not tf.startswith('.')]
target_files.sort()

In [33]:
target_files

['0721_Bottom_Accelerometer.csv',
 '0721_Middle_Accelerometer.csv',
 '0721_Upper_Accelerometer.csv']

In [44]:
for file in target_files:
    file_dir = os.getcwd()+dataset_dir_input + file
    print(file_dir)
    print(file.split("_")[1])
    df_new = pd.read_csv(file_dir)
    print(df_new.shape)
    #print(df_new.columns)

/Users/y-nakamura/yugo_workspace/bar_exercise/data/raw/20210716/0721_Bottom_Accelerometer.csv
Bottom
(161948, 8)
/Users/y-nakamura/yugo_workspace/bar_exercise/data/raw/20210716/0721_Middle_Accelerometer.csv
Middle
(102125, 8)
/Users/y-nakamura/yugo_workspace/bar_exercise/data/raw/20210716/0721_Upper_Accelerometer.csv
Upper
(68122, 10)


In [37]:
df_new 

Unnamed: 0,epoc (ms),timestamp (+0900),elapsed (s),x-axis (g),y-axis (g),z-axis (g),label,user,num_of_set,move_cursor
0,1.630000e+12,2021-07-16T15.17.48.292,0.000,0.012,-0.974,-0.219,0,0,,
1,1.630000e+12,2021-07-16T15.17.48.313,0.021,0.012,-0.982,-0.222,0,0,,
2,1.630000e+12,2021-07-16T15.17.48.332,0.040,0.012,-0.978,-0.221,0,0,,
3,1.630000e+12,2021-07-16T15.17.48.352,0.060,0.011,-0.980,-0.223,0,0,,
4,1.630000e+12,2021-07-16T15.17.48.371,0.079,0.012,-0.980,-0.222,0,0,,
...,...,...,...,...,...,...,...,...,...,...
68117,1.630000e+12,2021-07-16T15.40.22.981,1354.689,0.010,-0.978,-0.227,0,0,,
68118,1.630000e+12,2021-07-16T15.40.23.001,1354.709,0.011,-0.978,-0.224,0,0,,
68119,1.630000e+12,2021-07-16T15.40.23.021,1354.729,0.011,-0.978,-0.224,0,0,,
68120,1.630000e+12,2021-07-16T15.40.23.041,1354.749,0.010,-0.979,-0.225,0,0,,


In [43]:
#window = int(128/2)
#slide =  int(64/2)

window = int(128)
slide =  int(64)
for file in target_files:
    file_dir = os.getcwd()+dataset_dir_input + file
    print(file_dir )
    df_new = pd.read_csv(file_dir)

    test_acc_x = df_new['x-axis (g)'].values
    test_acc_y = df_new['y-axis (g)'].values
    test_acc_z = df_new['z-axis (g)'].values

    test_label = df_new[ 'label'].values 
    test_user = df_new[ 'user'].values 

    sliding_test_acc_x  = sliding_window(test_acc_x,  window, slide)
    sliding_test_acc_y  = sliding_window(test_acc_y, window, slide)
    sliding_test_acc_z  = sliding_window(test_acc_z,  window, slide)
    
    sliding_test_label  = sliding_window(test_label,  window, slide)
    sliding_test_user  = sliding_window(test_user,  window, slide)
    
    N = len(sliding_test_acc_x)
    
    all_features_acc = np.asarray([generate_all_features_xyz(sliding_test_acc_x[i], sliding_test_acc_y[i], sliding_test_acc_z[i], 100.0)  for i in range(N)])

    test_labels = []
    for action in sliding_test_label:
        test_labels.append(collections.Counter(action).most_common()[0][0])
        
    test_users = []
    for user in sliding_test_user:
        test_users.append(collections.Counter(user).most_common()[0][0])

    df_feature_acc = pd.DataFrame(data=all_features_acc, columns=acc_feature145_names)

    df_feature = pd.concat([df_feature_acc], axis=1)
    df_feature['label'] = test_labels
    df_feature['user'] = test_users
    df_feature.to_csv(os.getcwd()+dataset_dir_output+ file[:-4]+ "_features"+ str(window) +".csv", index=False)
    

/Users/y-nakamura/yugo_workspace/bar_exercise/data/raw/20210716/0721_Bottom_Accelerometer.csv
/Users/y-nakamura/yugo_workspace/bar_exercise/data/raw/20210716/0721_Middle_Accelerometer.csv
/Users/y-nakamura/yugo_workspace/bar_exercise/data/raw/20210716/0721_Upper_Accelerometer.csv
