In [1]:
import numpy as np
import math

### define functions:

In [2]:
def entropy(data, num_short_blocks=10):
    eol = np.sum(np.square(data))
    win_len = len(data)
    sub_win_len = math.floor(win_len / num_short_blocks)

    if win_len != sub_win_len * num_short_blocks:
        data = data[0:sub_win_len * num_short_blocks]
    sub_wins = data.reshape(sub_win_len, num_short_blocks, order='F').copy()
    norm_sub_frame_energies = np.zeros((1, sub_wins.shape[1]))
    for i in range(sub_wins.shape[1]):
        norm_sub_frame_energies[0, i] = np.sum(np.square(sub_wins[:, i])) / (eol + np.spacing(1))
    energy_entropy = 0
    for i in range(norm_sub_frame_energies.shape[1]):
        energy_entropy -= norm_sub_frame_energies[0, i] * math.log(norm_sub_frame_energies[0, i] + np.spacing(1), 2)
    return energy_entropy

In [3]:
def dft(data, f_s = 4000, p=0):
    win_len = len(data)
    fft = np.abs(np.fft.fft(data)) / win_len
    if not p:
        fft = fft[0:math.ceil(win_len)]
        f_req = (f_s / 2) * np.arange(0, np.ceil(win_len / 2) + 1) / np.ceil(win_len / 2)
    else:
        fft = np.fft.fftshift(fft)
        if win_len % 2:
            f_req = np.arange(-(win_len - 1) / 2, (win_len - 1) / 2 + 1)
        else:
            f_req = np.arange(-win_len / 2, win_len / 2)
    fft_1 = np.abs(fft)/win_len
    fft_2 = fft_1[1:(round(win_len / 2) + 1)]
    fft_2 = 2*fft_2
    return fft_2, f_req

In [4]:
def spectral_rolloff(data, c=0.90):
    total_energy = np.sum(np.square(data))
    curr_energy = 0
    count_fft = 0
    fft_len = len(data)
    while curr_energy <= c * total_energy and count_fft <= fft_len:
        curr_energy += data[count_fft] ** 2
        count_fft += 1
    count_fft -= 1
    return (count_fft - 1) / fft_len

In [5]:
def spectral_centroid(data, f_s = 4000):
    fft_len = len(data)
    m = np.transpose((f_s / (2 * fft_len)) * np.arange(1, fft_len+1))
    data = data / np.max(data)
    c = np.sum(np.multiply(m, data)) / (np.sum(data) + np.spacing(1))
    k = np.sum(np.square(m - c) * data)
    l = (np.sum(data) + np.spacing(1))
    
    if k*l <0 :
        s = math.sqrt( -1* k / l ) / (f_s / 2)
        c = c / (f_s / 2)
        return c
    else:
        s = math.sqrt( k / l ) / (f_s / 2)
        c = c / (f_s / 2)
        return c

In [6]:
def spectral_spread(data, f_s=4000):
    fft_len = len(data)
    m = np.transpose((f_s / (2 * fft_len)) * np.arange(1, fft_len+1))
    data = data / np.max(data)
    c = np.sum(np.multiply(m, data)) / (np.sum(data) + np.spacing(1))
    k = np.sum(np.square(m - c) * data)
    l = (np.sum(data) + np.spacing(1))
    
    if k*l <0 :
        s = math.sqrt( -1* k / l ) / (f_s / 2)
        c = c / (f_s / 2)
        return c
    else:
        s = math.sqrt( k / l ) / (f_s / 2)
        c = c / (f_s / 2)
        return s

### define data

In [7]:
path = "/Users/ecem/Desktop/gyrocardiogram/phase2/"

In [8]:
x = np.load(path + "10sec_x_splitted.npy", allow_pickle= True)
y = np.load(path + "10sec_y_splitted.npy", allow_pickle= True)
z = np.load(path + "10sec_z_splitted.npy", allow_pickle= True)

In [9]:
x.shape

(3917, 2560)

In [10]:
len(x[0])

2560

In [11]:
len(y[0])

2560

In [12]:
len(z[0])

2560

In [13]:
diseased = np.stack((x, y, z), axis = 0)

In [14]:
#sanity check
print(diseased.shape)

(3, 3917, 2560)


## Feature Extraction:


### Spectral Entropy:

In [15]:
dft_ = dft(diseased[0][1])[0]

In [16]:
diseased[0,1].shape

(2560,)

In [17]:
entropy(diseased[0][1])

3.2743116201419538

In [18]:
entropy_ = np.ndarray((3, diseased.shape[1]))
for i in range(diseased.shape[1]):
    entropy_[0,i] = entropy(diseased[0,i], num_short_blocks = 10)
    entropy_[1,i] = entropy(diseased[1,i], num_short_blocks = 10)
    entropy_[2,i] = entropy(diseased[2,i], num_short_blocks = 10)            

In [19]:
entropy_.shape

(3, 3917)

### Spectral Rolloff

In [20]:
spectral_rolloff(dft_)

0.103125

In [21]:
spec_roll = np.ndarray((3, diseased.shape[1]))
for i in range(diseased.shape[1]):
    spec_roll[0,i] = spectral_rolloff(diseased[0,i])
    spec_roll[1,i] = spectral_rolloff(diseased[1,i])
    spec_roll[2,i] = spectral_rolloff(diseased[2,i])  
    

In [22]:
spec_roll.shape

(3, 3917)

In [23]:
spec_roll

array([[0.8       , 0.88945312, 0.94257813, ..., 0.88515625, 0.86171875,
        0.4328125 ],
       [0.8796875 , 0.88476562, 0.88125   , ..., 0.8078125 , 0.85859375,
        0.390625  ],
       [0.87851563, 0.87304688, 0.91328125, ..., 0.91523438, 0.87109375,
        0.45273438]])

### Spectral Centroid:

In [24]:
spectral_centroid(dft_)

0.29175691675502463

In [25]:
centr = np.ndarray((3, diseased.shape[1]))
for i in range(diseased.shape[1]):
    centr[0,i] = spectral_centroid(diseased[0,i])
    centr[1,i] = spectral_centroid(diseased[1,i])
    centr[2,i] = spectral_centroid(diseased[2,i])  

  data = data / np.max(data)
  data = data / np.max(data)


In [26]:
centr.shape

(3, 3917)

In [27]:
centr

array([[ 0.45448364,  0.49707354,  0.51349673, ...,  0.71025242,
         0.58394504, -0.05804273],
       [ 0.93892826,  0.15512827,  0.79594941, ...,  0.62306243,
         0.45629857, -0.07539979],
       [ 0.49429198,  0.49041168,  0.49398174, ...,  0.39691348,
         0.45238621,  0.0583299 ]])

### Spectral Spread

In [28]:
spec_spread = np.ndarray((3, diseased.shape[1]))
for i in range(diseased.shape[1]):
    spec_spread[0,i] = spectral_spread(diseased[0,i])
    spec_spread[1,i] = spectral_spread(diseased[1,i])
    spec_spread[2,i] = spectral_spread(diseased[2,i])  


  data = data / np.max(data)
  data = data / np.max(data)


# DIVIDE BY ZERO HATASINI SOR:

In [29]:
spec_spread.shape

(3, 3917)

In [30]:
spec_spread

array([[ 0.31789196,  0.29211249,  0.29531714, ...,  0.46212766,
         0.52265884, -0.05804273],
       [ 0.93892826,  0.32913979,  0.41978198, ...,  0.504313  ,
         0.50517219, -0.07539979],
       [ 0.29154947,  0.2889934 ,  0.28911898, ...,  0.39691348,
         0.42651615,  0.12907311]])

### Create DataFrame

In [31]:
import pandas as pd

In [32]:
spec_entropy = pd.DataFrame(entropy_.T, columns =["SE x", "SE y", "SE z"])
specroll = pd.DataFrame(spec_roll.T, columns =["SR x", "SR y", "SR z"])
spec_centr = pd.DataFrame(centr.T, columns =["SC x", "SC y", "SC z"])
spec_spread = pd.DataFrame(spec_spread.T, columns =["SS x", "SS y", "SS z"])

df = pd.concat([spec_entropy, specroll, spec_centr, spec_spread], axis =1)
df

Unnamed: 0,SE x,SE y,SE z,SR x,SR y,SR z,SC x,SC y,SC z,SS x,SS y,SS z
0,2.764249,3.213640,3.292419,0.800000,0.879687,0.878516,0.454484,0.938928,0.494292,0.317892,0.938928,0.291549
1,3.274312,3.309540,3.312831,0.889453,0.884766,0.873047,0.497074,0.155128,0.490412,0.292112,0.329140,0.288993
2,3.298964,3.275258,3.308264,0.942578,0.881250,0.913281,0.513497,0.795949,0.493982,0.295317,0.419782,0.289119
3,3.284066,3.275852,3.306214,0.880469,0.871875,0.893359,0.539506,0.638711,0.499485,0.261887,0.408647,0.287926
4,3.280501,3.294480,3.312584,0.899609,0.888672,0.889844,0.521286,0.161650,0.501276,0.292120,0.237132,0.281535
...,...,...,...,...,...,...,...,...,...,...,...,...
3912,3.309207,3.281770,3.289307,0.881641,0.833594,0.882812,-20.216970,2.443424,-0.161938,-20.216970,2.443424,-0.161938
3913,3.275286,3.215557,3.293427,0.911328,0.920703,0.905078,0.328882,0.336628,0.784633,0.467273,0.453857,0.157898
3914,3.209732,3.136212,3.293772,0.885156,0.807813,0.915234,0.710252,0.623062,0.396913,0.462128,0.504313,0.396913
3915,3.174726,3.102717,3.294035,0.861719,0.858594,0.871094,0.583945,0.456299,0.452386,0.522659,0.505172,0.426516


In [33]:
df.to_csv(path + "selected_features_10sec.csv")