In [1]:
import numpy as np
import math

### define functions:

In [2]:
def entropy(data, num_short_blocks=10):
    eol = np.sum(np.square(data))
    win_len = len(data)
    sub_win_len = math.floor(win_len / num_short_blocks)

    if win_len != sub_win_len * num_short_blocks:
        data = data[0:sub_win_len * num_short_blocks]
    sub_wins = data.reshape(sub_win_len, num_short_blocks, order='F').copy()
    norm_sub_frame_energies = np.zeros((1, sub_wins.shape[1]))
    for i in range(sub_wins.shape[1]):
        norm_sub_frame_energies[0, i] = np.sum(np.square(sub_wins[:, i])) / (eol + np.spacing(1))
    energy_entropy = 0
    for i in range(norm_sub_frame_energies.shape[1]):
        energy_entropy -= norm_sub_frame_energies[0, i] * math.log(norm_sub_frame_energies[0, i] + np.spacing(1), 2)
    return energy_entropy

In [3]:
def dft(data, f_s = 4000, p=0):
    win_len = len(data)
    fft = np.abs(np.fft.fft(data)) / win_len
    if not p:
        fft = fft[0:math.ceil(win_len)]
        f_req = (f_s / 2) * np.arange(0, np.ceil(win_len / 2) + 1) / np.ceil(win_len / 2)
    else:
        fft = np.fft.fftshift(fft)
        if win_len % 2:
            f_req = np.arange(-(win_len - 1) / 2, (win_len - 1) / 2 + 1)
        else:
            f_req = np.arange(-win_len / 2, win_len / 2)
    fft_1 = np.abs(fft)/win_len
    fft_2 = fft_1[1:(round(win_len / 2) + 1)]
    fft_2 = 2*fft_2
    return fft_2, f_req

In [4]:
def spectral_rolloff(data, c=0.90):
    total_energy = np.sum(np.square(data))
    curr_energy = 0
    count_fft = 0
    fft_len = len(data)
    while curr_energy <= c * total_energy and count_fft <= fft_len:
        curr_energy += data[count_fft] ** 2
        count_fft += 1
    count_fft -= 1
    return (count_fft - 1) / fft_len

In [5]:
def spectral_centroid(data, f_s = 4000):
    fft_len = len(data)
    m = np.transpose((f_s / (2 * fft_len)) * np.arange(1, fft_len+1))
    data = data / np.max(data)
    c = np.sum(np.multiply(m, data)) / (np.sum(data) + np.spacing(1))
    k = np.sum(np.square(m - c) * data)
    l = (np.sum(data) + np.spacing(1))
    
    if k*l <0 :
        s = math.sqrt( -1* k / l ) / (f_s / 2)
        c = c / (f_s / 2)
        return c
    else:
        s = math.sqrt( k / l ) / (f_s / 2)
        c = c / (f_s / 2)
        return c

In [6]:
def spectral_spread(data, f_s=4000):
    fft_len = len(data)
    m = np.transpose((f_s / (2 * fft_len)) * np.arange(1, fft_len+1))
    data = data / np.max(data)
    c = np.sum(np.multiply(m, data)) / (np.sum(data) + np.spacing(1))
    k = np.sum(np.square(m - c) * data)
    l = (np.sum(data) + np.spacing(1))
    
    if k*l <0 :
        s = math.sqrt( -1* k / l ) / (f_s / 2)
        c = c / (f_s / 2)
        return c
    else:
        s = math.sqrt( k / l ) / (f_s / 2)
        c = c / (f_s / 2)
        return s

### define data

In [7]:
path = "/Users/ecem/Desktop/gyrocardiogram/s-vs-r/"
diseased_s = np.load(path + "data/diseased-10sec-s.npy", allow_pickle= True)
diseased_r = np.load(path + "data/diseased-10sec-r.npy", allow_pickle= True)

print(diseased_s.shape)

(3, 1070, 2560)


## Feature Extraction:


### Entropy:

In [8]:
entropy_s = np.ndarray((3,diseased_s.shape[1]))
for i in range(diseased_s.shape[1]):
    entropy_s[0,i] = entropy(diseased_s[0,i], num_short_blocks = 10)
    entropy_s[1,i] = entropy(diseased_s[1,i], num_short_blocks = 10)
    entropy_s[2,i] = entropy(diseased_s[2,i], num_short_blocks = 10)

In [9]:
entropy_r = np.ndarray((3,diseased_r.shape[1]))
for i in range(diseased_r.shape[1]):
    entropy_r[0,i] = entropy(diseased_r[0,i], num_short_blocks = 10)
    entropy_r[1,i] = entropy(diseased_r[1,i], num_short_blocks = 10)
    entropy_r[2,i] = entropy(diseased_r[2,i], num_short_blocks = 10)

### Spectral Entropy:

In [10]:
entropy(dft(diseased_r[0][1])[0])

0.802702037807868

In [11]:
spec_ent_s = np.ndarray((3,diseased_s.shape[1]))
for i in range(diseased_s.shape[1]):
    x = dft(diseased_s[0,i])[0]
    y = dft(diseased_s[1,i])[0]
    z = dft(diseased_s[2,i])[0]
    
    spec_ent_s[0,i] = entropy(x, num_short_blocks = 10)
    spec_ent_s[1,i] = entropy(y, num_short_blocks = 10)
    spec_ent_s[2,i] = entropy(z, num_short_blocks = 10)

In [12]:
spec_ent_r = np.ndarray((3,diseased_r.shape[1]))
for i in range(diseased_r.shape[1]):
    x = dft(diseased_r[0,i])[0]
    y = dft(diseased_r[1,i])[0]
    z = dft(diseased_r[2,i])[0]
    
    spec_ent_r[0,i] = entropy(x, num_short_blocks = 10)
    spec_ent_r[1,i] = entropy(y, num_short_blocks = 10)
    spec_ent_r[2,i] = entropy(z, num_short_blocks = 10)

### Spectral Rolloff

In [13]:
spec_roll_s = np.ndarray((3,diseased_s.shape[1]))
for i in range(diseased_s.shape[1]):
    x = dft(diseased_s[0,i])[0]
    y = dft(diseased_s[1,i])[0]
    z = dft(diseased_s[2,i])[0]
    
    spec_roll_s[0,i] = spectral_rolloff(x)
    spec_roll_s[1,i] = spectral_rolloff(y)
    spec_roll_s[2,i] = spectral_rolloff(z)

In [14]:
spec_roll_r = np.ndarray((3,diseased_r.shape[1]))
for i in range(diseased_s.shape[1]):
    x = dft(diseased_r[0,i])[0]
    y = dft(diseased_r[1,i])[0]
    z = dft(diseased_r[2,i])[0]
    
    spec_roll_r[0,i] = spectral_rolloff(x)
    spec_roll_r[1,i] = spectral_rolloff(y)
    spec_roll_r[2,i] = spectral_rolloff(z)

### Spectral Centroid:

In [15]:
centr_s = np.ndarray((3,diseased_s.shape[1]))
for i in range(diseased_s.shape[1]):
    x = dft(diseased_s[0,i])[0]
    y = dft(diseased_s[1,i])[0]
    z = dft(diseased_s[2,i])[0]
                                
    centr_s[0,i] = spectral_centroid(x)
    centr_s[1,i] = spectral_centroid(y)
    centr_s[2,i] = spectral_centroid(z)
    
                                

In [16]:
centr_r = np.ndarray((3,diseased_r.shape[1]))
for i in range(diseased_s.shape[1]):
    x = dft(diseased_r[0,i])[0]
    y = dft(diseased_r[1,i])[0]
    z = dft(diseased_r[2,i])[0]
                                
    centr_r[0,i] = spectral_centroid(x)
    centr_r[1,i] = spectral_centroid(y)
    centr_r[2,i] = spectral_centroid(z)
    

### Spectral Spread

In [17]:
spec_spread_s = np.ndarray((3,diseased_s.shape[1]))
for i in range(diseased_s.shape[1]):
    x = dft(diseased_s[0,i])[0]
    y = dft(diseased_s[1,i])[0]
    z = dft(diseased_s[2,i])[0]
                                
    spec_spread_s[0,i] = spectral_spread(x)
    spec_spread_s[1,i] = spectral_spread(y)
    spec_spread_s[2,i] = spectral_spread(z)
    
                                

In [18]:
spec_spread_r = np.ndarray((3,diseased_r.shape[1]))
for i in range(diseased_r.shape[1]):
    x = dft(diseased_r[0,i])[0]
    y = dft(diseased_r[1,i])[0]
    z = dft(diseased_r[2,i])[0]
                                
    spec_spread_r[0,i] = spectral_spread(x)
    spec_spread_r[1,i] = spectral_spread(y)
    spec_spread_r[2,i] = spectral_spread(z)
    

### Create DataFrame

In [19]:
import pandas as pd

In [20]:
entropy_s.shape

(3, 1070)

### for S:


In [21]:
entropy_s = pd.DataFrame(entropy_s.T, columns =["E x", "E y", "E z"])
spec_entropy_s = pd.DataFrame(spec_ent_s.T, columns =["SE x", "SE y", "SE z"])
specroll_s = pd.DataFrame(spec_roll_s.T, columns =["SR x", "SR y", "SR z"])
spec_centr_s = pd.DataFrame(centr_s.T, columns =["SC x", "SC y", "SC z"])
spec_spread_s = pd.DataFrame(spec_spread_s.T, columns =["SS x", "SS y", "SS z"])

df_s = pd.concat([entropy_s, spec_entropy_s, specroll_s, spec_centr_s, spec_spread_s], axis =1)
df_s

Unnamed: 0,E x,E y,E z,SE x,SE y,SE z,SR x,SR y,SR z,SC x,SC y,SC z,SS x,SS y,SS z
0,3.076283,3.159694,3.216961,1.147296,1.137282,2.720335,0.186719,0.177344,0.810156,0.265644,0.246914,0.415718,0.277956,0.266299,0.311216
1,3.038338,3.238392,3.298141,1.200881,1.497325,2.980266,0.203906,0.246875,0.848437,0.279646,0.276328,0.431023,0.278084,0.277671,0.307812
2,3.057839,3.255923,3.299910,1.192592,1.464650,2.967876,0.227344,0.235156,0.835156,0.297894,0.274229,0.430808,0.285333,0.274223,0.303927
3,2.865379,3.291108,3.304233,1.046914,1.532031,3.041578,0.197656,0.260156,0.847656,0.297172,0.278313,0.436512,0.287122,0.276357,0.302908
4,2.932381,3.215899,3.311308,1.122829,1.396609,3.049081,0.196094,0.217969,0.848437,0.293139,0.268818,0.439668,0.287785,0.271878,0.302744
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1065,3.309207,3.281770,3.289307,0.624311,0.510057,1.638396,0.107031,0.070312,0.239063,0.199474,0.195768,0.268959,0.242105,0.231993,0.249236
1066,3.275286,3.215557,3.293427,0.700151,0.474129,1.639190,0.110937,0.056250,0.229687,0.206981,0.192105,0.269199,0.246131,0.233164,0.251797
1067,3.209732,3.136212,3.293772,0.775905,0.544999,1.690644,0.118750,0.071094,0.251563,0.210549,0.204443,0.280597,0.249684,0.240884,0.251890
1068,3.174726,3.102717,3.294035,0.584600,0.384504,1.658883,0.098437,0.023438,0.232813,0.199500,0.194205,0.279846,0.246544,0.239034,0.256621


In [22]:
entropy_r = pd.DataFrame(entropy_r.T, columns =["E x", "E y", "E z"])
spec_entropy_r = pd.DataFrame(spec_ent_r.T, columns =["SE x", "SE y", "SE z"])
specroll_r = pd.DataFrame(spec_roll_r.T, columns =["SR x", "SR y", "SR z"])
spec_centr_r = pd.DataFrame(centr_r.T, columns =["SC x", "SC y", "SC z"])
spec_spread_r = pd.DataFrame(spec_spread_r.T, columns =["SS x", "SS y", "SS z"])

df_r = pd.concat([entropy_r, spec_entropy_r, specroll_r, spec_centr_r, spec_spread_r], axis =1)
df_r

Unnamed: 0,E x,E y,E z,SE x,SE y,SE z,SR x,SR y,SR z,SC x,SC y,SC z,SS x,SS y,SS z
0,2.764249,3.213640,3.292419,0.486410,1.225517,1.816054,0.076563,0.242188,5.398437e-01,0.225210,0.310738,3.653235e-01,0.285369,0.297846,0.310569
1,3.274312,3.309540,3.312831,0.802702,1.252829,1.935522,0.103125,0.232031,5.835938e-01,0.291757,0.327329,3.893269e-01,0.302834,0.300690,0.308906
2,3.298964,3.275258,3.308264,0.769201,1.313210,1.966790,0.103125,0.270313,5.617188e-01,0.270925,0.315554,3.693574e-01,0.297048,0.299074,0.306664
3,3.284066,3.275852,3.306214,0.722574,1.314096,1.977958,0.103125,0.267188,6.117187e-01,0.269714,0.314589,3.759174e-01,0.298395,0.298303,0.310192
4,3.280501,3.294480,3.312584,0.715057,1.237249,1.987793,0.100781,0.260156,5.875000e-01,0.276584,0.311875,3.723285e-01,0.299931,0.300321,0.309093
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2012,3.135881,3.175903,3.289960,0.277297,0.689034,1.680623,-0.342749,-1.411189,7.085093e+91,-0.342749,-1.411189,2.146985e-314,0.285787,0.313289,0.317396
2013,1.590117,1.829873,1.345448,1.367344,2.114864,1.476057,0.194755,-1.064346,5.981993e-154,0.194755,-1.064346,1.242809e+151,0.187350,0.270916,0.233114
2014,1.893738,2.026987,2.099313,0.538687,0.704298,0.663752,-0.311291,1.797036,4.056030e+232,-0.311291,1.797036,2.146985e-314,0.168540,0.212103,0.167797
2015,1.966712,1.872642,1.494696,0.590904,0.428958,0.412348,0.464122,1.861138,2.181744e+243,0.464122,1.861138,2.146985e-314,0.214936,0.216680,0.187534


In [23]:
df = pd.concat([df_s, df_r], axis = 0)
df

Unnamed: 0,E x,E y,E z,SE x,SE y,SE z,SR x,SR y,SR z,SC x,SC y,SC z,SS x,SS y,SS z
0,3.076283,3.159694,3.216961,1.147296,1.137282,2.720335,0.186719,0.177344,8.101563e-01,0.265644,0.246914,4.157178e-01,0.277956,0.266299,0.311216
1,3.038338,3.238392,3.298141,1.200881,1.497325,2.980266,0.203906,0.246875,8.484375e-01,0.279646,0.276328,4.310229e-01,0.278084,0.277671,0.307812
2,3.057839,3.255923,3.299910,1.192592,1.464650,2.967876,0.227344,0.235156,8.351563e-01,0.297894,0.274229,4.308083e-01,0.285333,0.274223,0.303927
3,2.865379,3.291108,3.304233,1.046914,1.532031,3.041578,0.197656,0.260156,8.476562e-01,0.297172,0.278313,4.365125e-01,0.287122,0.276357,0.302908
4,2.932381,3.215899,3.311308,1.122829,1.396609,3.049081,0.196094,0.217969,8.484375e-01,0.293139,0.268818,4.396676e-01,0.287785,0.271878,0.302744
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2012,3.135881,3.175903,3.289960,0.277297,0.689034,1.680623,-0.342749,-1.411189,7.085093e+91,-0.342749,-1.411189,2.146985e-314,0.285787,0.313289,0.317396
2013,1.590117,1.829873,1.345448,1.367344,2.114864,1.476057,0.194755,-1.064346,5.981993e-154,0.194755,-1.064346,1.242809e+151,0.187350,0.270916,0.233114
2014,1.893738,2.026987,2.099313,0.538687,0.704298,0.663752,-0.311291,1.797036,4.056030e+232,-0.311291,1.797036,2.146985e-314,0.168540,0.212103,0.167797
2015,1.966712,1.872642,1.494696,0.590904,0.428958,0.412348,0.464122,1.861138,2.181744e+243,0.464122,1.861138,2.146985e-314,0.214936,0.216680,0.187534


In [24]:
df = df[np.random.permutation(df.columns)]
df

Unnamed: 0,SC x,SC z,SS y,SR y,SS z,SR x,SR z,SE z,SE x,SE y,SC y,E z,SS x,E x,E y
0,0.265644,4.157178e-01,0.266299,0.177344,0.311216,0.186719,8.101563e-01,2.720335,1.147296,1.137282,0.246914,3.216961,0.277956,3.076283,3.159694
1,0.279646,4.310229e-01,0.277671,0.246875,0.307812,0.203906,8.484375e-01,2.980266,1.200881,1.497325,0.276328,3.298141,0.278084,3.038338,3.238392
2,0.297894,4.308083e-01,0.274223,0.235156,0.303927,0.227344,8.351563e-01,2.967876,1.192592,1.464650,0.274229,3.299910,0.285333,3.057839,3.255923
3,0.297172,4.365125e-01,0.276357,0.260156,0.302908,0.197656,8.476562e-01,3.041578,1.046914,1.532031,0.278313,3.304233,0.287122,2.865379,3.291108
4,0.293139,4.396676e-01,0.271878,0.217969,0.302744,0.196094,8.484375e-01,3.049081,1.122829,1.396609,0.268818,3.311308,0.287785,2.932381,3.215899
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2012,-0.342749,2.146985e-314,0.313289,-1.411189,0.317396,-0.342749,7.085093e+91,1.680623,0.277297,0.689034,-1.411189,3.289960,0.285787,3.135881,3.175903
2013,0.194755,1.242809e+151,0.270916,-1.064346,0.233114,0.194755,5.981993e-154,1.476057,1.367344,2.114864,-1.064346,1.345448,0.187350,1.590117,1.829873
2014,-0.311291,2.146985e-314,0.212103,1.797036,0.167797,-0.311291,4.056030e+232,0.663752,0.538687,0.704298,1.797036,2.099313,0.168540,1.893738,2.026987
2015,0.464122,2.146985e-314,0.216680,1.861138,0.187534,0.464122,2.181744e+243,0.412348,0.590904,0.428958,1.861138,1.494696,0.214936,1.966712,1.872642


In [25]:
df.to_csv(path + "/feature_extr/spectral_features.csv")