In [1]:
import numpy as np
import math

### define functions:

In [2]:
def entropy(data, num_short_blocks=10):
    eol = np.sum(np.square(data))
    win_len = len(data)
    sub_win_len = math.floor(win_len / num_short_blocks)

    if win_len != sub_win_len * num_short_blocks:
        data = data[0:sub_win_len * num_short_blocks]
    sub_wins = data.reshape(sub_win_len, num_short_blocks, order='F').copy()
    norm_sub_frame_energies = np.zeros((1, sub_wins.shape[1]))
    for i in range(sub_wins.shape[1]):
        norm_sub_frame_energies[0, i] = np.sum(np.square(sub_wins[:, i])) / (eol + np.spacing(1))
    energy_entropy = 0
    for i in range(norm_sub_frame_energies.shape[1]):
        energy_entropy -= norm_sub_frame_energies[0, i] * math.log(norm_sub_frame_energies[0, i] + np.spacing(1), 2)
    return energy_entropy

In [3]:
def dft(data, f_s = 4000, p=0):
    win_len = len(data)
    fft = np.abs(np.fft.fft(data)) / win_len
    if not p:
        fft = fft[0:math.ceil(win_len)]
        f_req = (f_s / 2) * np.arange(0, np.ceil(win_len / 2) + 1) / np.ceil(win_len / 2)
    else:
        fft = np.fft.fftshift(fft)
        if win_len % 2:
            f_req = np.arange(-(win_len - 1) / 2, (win_len - 1) / 2 + 1)
        else:
            f_req = np.arange(-win_len / 2, win_len / 2)
    fft_1 = np.abs(fft)/win_len
    fft_2 = fft_1[1:(round(win_len / 2) + 1)]
    fft_2 = 2*fft_2
    return fft_2, f_req

In [4]:
def spectral_rolloff(data, c=0.90):
    total_energy = np.sum(np.square(data))
    curr_energy = 0
    count_fft = 0
    fft_len = len(data)
    while curr_energy <= c * total_energy and count_fft <= fft_len:
        curr_energy += data[count_fft] ** 2
        count_fft += 1
    count_fft -= 1
    return (count_fft - 1) / fft_len

In [5]:
def spectral_centroid(data, f_s = 4000):
    fft_len = len(data)
    m = np.transpose((f_s / (2 * fft_len)) * np.arange(1, fft_len+1))
    data = data / np.max(data)
    c = np.sum(np.multiply(m, data)) / (np.sum(data) + np.spacing(1))
    k = np.sum(np.square(m - c) * data)
    l = (np.sum(data) + np.spacing(1))
    
    if k*l <0 :
        s = math.sqrt( -1* k / l ) / (f_s / 2)
        c = c / (f_s / 2)
        return c
    else:
        s = math.sqrt( k / l ) / (f_s / 2)
        c = c / (f_s / 2)
        return c

In [6]:
def spectral_spread(data, f_s=4000):
    fft_len = len(data)
    m = np.transpose((f_s / (2 * fft_len)) * np.arange(1, fft_len+1))
    data = data / np.max(data)
    c = np.sum(np.multiply(m, data)) / (np.sum(data) + np.spacing(1))
    k = np.sum(np.square(m - c) * data)
    l = (np.sum(data) + np.spacing(1))
    
    if k*l <0 :
        s = math.sqrt( -1* k / l ) / (f_s / 2)
        c = c / (f_s / 2)
        return c
    else:
        s = math.sqrt( k / l ) / (f_s / 2)
        c = c / (f_s / 2)
        return s

### define data

In [7]:
path = "/Users/ecem/Desktop/gyrocardiogram/phase2/"

In [8]:
x = np.load(path + "scg_10sec_x_splitted.npy", allow_pickle= True)
y = np.load(path + "scg_10sec_y_splitted.npy", allow_pickle= True)
z = np.load(path + "scg_10sec_z_splitted.npy", allow_pickle= True)

In [9]:
x.shape

(4414, 2560)

In [10]:
len(x[0])

2560

In [11]:
len(y[0])

2560

In [12]:
len(z[0])

2560

In [13]:
diseased = np.stack((x, y, z), axis = 0)

In [14]:
#sanity check
print(diseased.shape)

(3, 4414, 2560)


## Feature Extraction:


### Spectral Entropy:

In [15]:
dft_ = dft(diseased[0][1])[0]

In [16]:
diseased[0,1].shape

(2560,)

In [17]:
entropy(diseased[0][1])

3.321919324678227

In [18]:
entropy_ = np.ndarray((3, diseased.shape[1]))
for i in range(diseased.shape[1]):
    entropy_[0,i] = entropy(diseased[0,i], num_short_blocks = 10)
    entropy_[1,i] = entropy(diseased[1,i], num_short_blocks = 10)
    entropy_[2,i] = entropy(diseased[2,i], num_short_blocks = 10)            

In [19]:
entropy_.shape

(3, 4414)

### Spectral Rolloff

In [20]:
spectral_rolloff(dft_)

0.25

In [21]:
spec_roll = np.ndarray((3, diseased.shape[1]))
for i in range(diseased.shape[1]):
    spec_roll[0,i] = spectral_rolloff(diseased[0,i])
    spec_roll[1,i] = spectral_rolloff(diseased[1,i])
    spec_roll[2,i] = spectral_rolloff(diseased[2,i])  
    

In [22]:
spec_roll.shape

(3, 4414)

In [23]:
spec_roll

array([[0.90078125, 0.89921875, 0.89921875, ..., 0.89882812, 0.890625  ,
        0.8875    ],
       [0.89804688, 0.89882812, 0.89921875, ..., 0.89960938, 0.88632813,
        0.87890625],
       [0.89960938, 0.89921875, 0.89921875, ..., 0.89960938, 0.89960938,
        0.89960938]])

### Spectral Centroid:

In [24]:
spectral_centroid(dft_)

0.29754440318774195

In [25]:
centr = np.ndarray((3, diseased.shape[1]))
for i in range(diseased.shape[1]):
    centr[0,i] = spectral_centroid(diseased[0,i])
    centr[1,i] = spectral_centroid(diseased[1,i])
    centr[2,i] = spectral_centroid(diseased[2,i])  

  data = data / np.max(data)
  data = data / np.max(data)


In [26]:
centr.shape

(3, 4414)

In [27]:
centr

array([[0.5019101 , 0.49999669, 0.49989894, ..., 0.50603604, 0.50273722,
        0.49873106],
       [0.49895767, 0.4995163 , 0.49997895, ..., 0.50781525, 0.50428544,
        0.49917929],
       [0.50024384, 0.50021557, 0.5001834 , ..., 0.50012387, 0.50016222,
        0.50022746]])

### Spectral Spread

In [28]:
spec_spread = np.ndarray((3, diseased.shape[1]))
for i in range(diseased.shape[1]):
    spec_spread[0,i] = spectral_spread(diseased[0,i])
    spec_spread[1,i] = spectral_spread(diseased[1,i])
    spec_spread[2,i] = spectral_spread(diseased[2,i])  


  data = data / np.max(data)
  data = data / np.max(data)


# DIVIDE BY ZERO HATASINI SOR:

In [29]:
spec_spread.shape

(3, 4414)

In [30]:
spec_spread

array([[0.28869345, 0.28867993, 0.2886001 , ..., 0.28894197, 0.28438344,
        0.2842298 ],
       [0.28863975, 0.28868779, 0.2886001 , ..., 0.29054769, 0.28292054,
        0.28116037],
       [0.28867626, 0.28866833, 0.28866746, ..., 0.28867202, 0.28873285,
        0.28873328]])

### Create DataFrame

In [31]:
import pandas as pd

In [32]:
spec_entropy = pd.DataFrame(entropy_.T, columns =["SE x", "SE y", "SE z"])
specroll = pd.DataFrame(spec_roll.T, columns =["SR x", "SR y", "SR z"])
spec_centr = pd.DataFrame(centr.T, columns =["SC x", "SC y", "SC z"])
spec_spread = pd.DataFrame(spec_spread.T, columns =["SS x", "SS y", "SS z"])

df = pd.concat([spec_entropy, specroll, spec_centr, spec_spread], axis =1)
df

Unnamed: 0,SE x,SE y,SE z,SR x,SR y,SR z,SC x,SC y,SC z,SS x,SS y,SS z
0,3.321794,3.321779,3.321928,0.900781,0.898047,0.899609,0.501910,0.498958,0.500244,0.288693,0.288640,0.288676
1,3.321919,3.321889,3.321928,0.899219,0.898828,0.899219,0.499997,0.499516,0.500216,0.288680,0.288688,0.288668
2,3.321912,3.321909,3.321928,0.899219,0.899219,0.899219,0.499899,0.499979,0.500183,0.288600,0.288600,0.288667
3,3.321833,3.321780,3.321928,0.896484,0.902344,0.899219,0.499137,0.501352,0.500161,0.288375,0.289310,0.288681
4,3.321859,3.321870,3.321928,0.900781,0.899219,0.899219,0.501234,0.499578,0.500206,0.288552,0.288620,0.288674
...,...,...,...,...,...,...,...,...,...,...,...,...
4409,3.315372,3.307817,3.321927,0.907813,0.912891,0.899219,0.507174,0.510179,0.500086,0.291749,0.294270,0.288629
4410,3.316430,3.309940,3.321927,0.907031,0.911719,0.899219,0.507915,0.510864,0.500083,0.290859,0.292761,0.288646
4411,3.317626,3.312692,3.321927,0.898828,0.899609,0.899609,0.506036,0.507815,0.500124,0.288942,0.290548,0.288672
4412,3.317750,3.313168,3.321927,0.890625,0.886328,0.899609,0.502737,0.504285,0.500162,0.284383,0.282921,0.288733


In [33]:
df.isnull().sum()

SE x    0
SE y    0
SE z    0
SR x    0
SR y    0
SR z    0
SC x    7
SC y    2
SC z    0
SS x    7
SS y    2
SS z    0
dtype: int64

In [33]:
df.to_csv(path + "scg_selected_features_10sec.csv")