In [1]:
import numpy as np
import math

### define functions:

In [2]:
def entropy(data, num_short_blocks=10):
    eol = np.sum(np.square(data))
    win_len = len(data)
    sub_win_len = math.floor(win_len / num_short_blocks)

    if win_len != sub_win_len * num_short_blocks:
        data = data[0:sub_win_len * num_short_blocks]
    sub_wins = data.reshape(sub_win_len, num_short_blocks, order='F').copy()
    norm_sub_frame_energies = np.zeros((1, sub_wins.shape[1]))
    for i in range(sub_wins.shape[1]):
        norm_sub_frame_energies[0, i] = np.sum(np.square(sub_wins[:, i])) / (eol + np.spacing(1))
    energy_entropy = 0
    for i in range(norm_sub_frame_energies.shape[1]):
        energy_entropy -= norm_sub_frame_energies[0, i] * math.log(norm_sub_frame_energies[0, i] + np.spacing(1), 2)
    return energy_entropy

In [3]:
def dft(data, f_s = 4000, p=0):
    win_len = len(data)
    fft = np.abs(np.fft.fft(data)) / win_len
    if not p:
        fft = fft[0:math.ceil(win_len)]
        f_req = (f_s / 2) * np.arange(0, np.ceil(win_len / 2) + 1) / np.ceil(win_len / 2)
    else:
        fft = np.fft.fftshift(fft)
        if win_len % 2:
            f_req = np.arange(-(win_len - 1) / 2, (win_len - 1) / 2 + 1)
        else:
            f_req = np.arange(-win_len / 2, win_len / 2)
    fft_1 = np.abs(fft)/win_len
    fft_2 = fft_1[1:(round(win_len / 2) + 1)]
    fft_2 = 2*fft_2
    return fft_2, f_req

In [4]:
def spectral_rolloff(data, c=0.90):
    total_energy = np.sum(np.square(data))
    curr_energy = 0
    count_fft = 0
    fft_len = len(data)
    while curr_energy <= c * total_energy and count_fft <= fft_len:
        curr_energy += data[count_fft] ** 2
        count_fft += 1
    count_fft -= 1
    return (count_fft - 1) / fft_len

In [5]:
def spectral_centroid(data, f_s = 4000):
    fft_len = len(data)
    m = np.transpose((f_s / (2 * fft_len)) * np.arange(1, fft_len+1))
    data = data / np.max(data)
    c = np.sum(np.multiply(m, data)) / (np.sum(data) + np.spacing(1))
    k = np.sum(np.square(m - c) * data)
    l = (np.sum(data) + np.spacing(1))
    
    if k*l <0 :
        s = math.sqrt( -1* k / l ) / (f_s / 2)
        c = c / (f_s / 2)
        return c
    else:
        s = math.sqrt( k / l ) / (f_s / 2)
        c = c / (f_s / 2)
        return c

In [6]:
def spectral_spread(data, f_s=4000):
    fft_len = len(data)
    m = np.transpose((f_s / (2 * fft_len)) * np.arange(1, fft_len+1))
    data = data / np.max(data)
    c = np.sum(np.multiply(m, data)) / (np.sum(data) + np.spacing(1))
    k = np.sum(np.square(m - c) * data)
    l = (np.sum(data) + np.spacing(1))
    
    if k*l <0 :
        s = math.sqrt( -1* k / l ) / (f_s / 2)
        c = c / (f_s / 2)
        return c
    else:
        s = math.sqrt( k / l ) / (f_s / 2)
        c = c / (f_s / 2)
        return s

### define data

In [7]:
path = "/Users/ecem/Desktop/gyrocardiogram/phase2/"

In [8]:
x = np.load(path + "100_x_seismo_splitted.npy", allow_pickle= True)
y = np.load(path + "100_y_seismo_splitted.npy", allow_pickle= True)
z = np.load(path + "100_z_seismo_splitted.npy", allow_pickle= True)

In [9]:
x.shape

(100,)

In [10]:
len(x[0])

44

In [11]:
len(y[0])

44

In [12]:
len(z[0])

44

In [13]:
x[0][0].shape

(2560,)

In [14]:
diseased = np.vstack((x, y, z))

In [15]:
#sanity check
print(diseased.shape)

(3, 100)


## Feature Extraction:


### Spectral Entropy:

In [16]:
dft_ = dft(diseased[0][1][0])[0]

In [17]:
entropy(dft_)

1.0295974286677647

In [18]:
spec_ent = np.ndarray((diseased.shape[1], 3))
for i in range(0, diseased.shape[-1]):
    x_sum, y_sum,z_sum = 0,0,0
    for j in range(0,len(x[i])):
        x_ = dft(diseased[0][i][j])[0] #her 10 sn lik kesitler
        y_ = dft(diseased[1][i][j])[0]
        z_ = dft(diseased[2][i][j])[0]

        x_sum += entropy(x_, num_short_blocks = 10) 
        y_sum += entropy(y_, num_short_blocks = 10)
        z_sum += entropy(z_, num_short_blocks = 10)
    
    spec_ent[i, 0] = x_sum/100
    spec_ent[i, 1] = y_sum/100
    spec_ent[i, 2] = z_sum/100                  

In [19]:
spec_ent.shape

(100, 3)

### Spectral Rolloff

In [20]:
spectral_rolloff(dft_)

0.18203125

In [21]:
spec_roll = np.ndarray((diseased.shape[1],3))
for i in range(0, diseased.shape[-1]):
    x_sum, y_sum, z_sum = 0,0,0
    for j in range(0, len(x[i])):
        x_ = dft(diseased[0][i][j])[0]
        y_ = dft(diseased[1][i][j])[0]
        z_ = dft(diseased[2][i][j])[0]

        x_sum += spectral_rolloff(x_)
        y_sum += spectral_rolloff(y_)
        z_sum += spectral_rolloff(z_)
        
    spec_roll[i,0] = x_sum / diseased.shape[-1]
    spec_roll[i,1] = y_sum / diseased.shape[-1]
    spec_roll[i,2] = z_sum / diseased.shape[-1]
    

In [22]:
spec_roll.shape

(100, 3)

In [23]:
spec_roll

array([[0.11176563, 0.15157071, 0.16840426],
       [0.13051562, 0.12789084, 0.07492769],
       [0.06471094, 0.12080683, 0.148125  ],
       [0.21170312, 0.20717808, 0.13082713],
       [0.10674219, 0.09558355, 0.1186875 ],
       [0.08844531, 0.16766041, 0.24246272],
       [0.14916406, 0.3339675 , 0.15666348],
       [0.146375  , 0.06261019, 0.10067701],
       [0.10557813, 0.0800313 , 0.09456454],
       [0.10084375, 0.06562325, 0.11204206],
       [0.09572656, 0.12895717, 0.14482027],
       [0.17353906, 0.25308867, 0.19601374],
       [0.15794531, 0.17097778, 0.03814121],
       [0.07640625, 0.11254726, 0.11010593],
       [0.16485937, 0.04916058, 0.11791225],
       [0.04774219, 0.07793942, 0.06070018],
       [0.12849219, 0.04026583, 0.07783307],
       [0.09496875, 0.08017908, 0.05019515],
       [0.104     , 0.08022175, 0.16772592],
       [0.06705469, 0.15568421, 0.08564426],
       [0.19929687, 0.15704673, 0.11774979],
       [0.04828906, 0.20462445, 0.10357019],
       [0.

### Spectral Centroid:

In [24]:
spectral_centroid(dft_)

0.22829380941345145

In [25]:
centr = np.ndarray((diseased.shape[1], 3))
for i in range(0, diseased.shape[-1]):
    x_sum, y_sum, z_sum = 0,0,0
    for j in range(0, len(x[i])):
        x_ = dft(diseased[0][i][j])[0]
        y_ = dft(diseased[1][i][j])[0]
        z_ = dft(diseased[2][i][j])[0]

        x_sum += spectral_centroid(x_)
        y_sum += spectral_centroid(y_)
        z_sum += spectral_centroid(z_)

    centr[i,0] = x_sum / diseased.shape[-1]
    centr[i,1] = y_sum / diseased.shape[-1]
    centr[i,2] = z_sum / diseased.shape[-1]

In [26]:
centr.shape

(100, 3)

In [27]:
centr

array([[0.12537383, 0.14073786, 0.14222779],
       [0.13017146, 0.13846713, 0.13091277],
       [0.1190697 , 0.11744482, 0.12362655],
       [0.14549834, 0.14736192, 0.13983504],
       [0.14573703, 0.133073  , 0.13779091],
       [0.13737902, 0.15447612, 0.16409653],
       [0.14687976, 0.20301586, 0.15725795],
       [0.12904906, 0.1133642 , 0.11637813],
       [0.13185736, 0.13081626, 0.13263343],
       [0.1424092 , 0.09956369, 0.12756423],
       [0.11281552, 0.14445275, 0.15551834],
       [0.1757893 , 0.18322605, 0.18496055],
       [0.12926029, 0.13067517, 0.08281613],
       [0.11876141, 0.14270961, 0.13861969],
       [0.12482635, 0.08768551, 0.11118606],
       [0.09718739, 0.10016047, 0.08346585],
       [0.12866397, 0.10898255, 0.11934219],
       [0.09368205, 0.10882178, 0.08208339],
       [0.12194285, 0.1089563 , 0.13957364],
       [0.09454735, 0.12484331, 0.10956609],
       [0.15177711, 0.13773556, 0.1191695 ],
       [0.08991181, 0.13708247, 0.11430442],
       [0.

### Spectral Spread

In [28]:
spec_spread = np.ndarray((diseased.shape[1], 3))
for i in range(diseased.shape[-1]):
    x_sum, y_sum, z_sum = 0,0,0
    for j in range(0, len(x[i])):
        x_ = dft(diseased[0][i][j])[0]
        y_ = dft(diseased[1][i][j])[0]
        z_ = dft(diseased[2][i][j])[0]
        
        x_sum += spectral_spread(x_)
        y_sum += spectral_spread(y_)
        z_sum += spectral_spread(z_)
    
    spec_spread[i,0] =  x_sum / diseased.shape[-1]
    spec_spread[i,1] =  y_sum / diseased.shape[-1]
    spec_spread[i,2] =  z_sum / diseased.shape[-1]


In [29]:
spec_spread.shape

(100, 3)

In [30]:
spec_spread

array([[0.1166652 , 0.12728683, 0.12769354],
       [0.12328143, 0.13454875, 0.13957499],
       [0.10808651, 0.10775312, 0.10430867],
       [0.12155873, 0.12101879, 0.12341705],
       [0.13595637, 0.13516522, 0.13090193],
       [0.13713416, 0.14101418, 0.1408558 ],
       [0.14206811, 0.1567767 , 0.15152372],
       [0.11561258, 0.11911967, 0.10742104],
       [0.1332318 , 0.13360952, 0.13233369],
       [0.13457158, 0.11612296, 0.12338553],
       [0.12214554, 0.14434557, 0.1449715 ],
       [0.16100846, 0.16270489, 0.16560663],
       [0.11511676, 0.11305907, 0.10036379],
       [0.12705538, 0.13430291, 0.13615488],
       [0.10654005, 0.09892543, 0.1071613 ],
       [0.10311231, 0.09929122, 0.09203832],
       [0.10997304, 0.11590841, 0.11599826],
       [0.0917489 , 0.10585752, 0.09425535],
       [0.11779399, 0.11306161, 0.11880033],
       [0.09668966, 0.10781394, 0.10897711],
       [0.13228459, 0.12986774, 0.10829844],
       [0.10368231, 0.11537572, 0.10911787],
       [0.

### Create DataFrame

In [31]:
import pandas as pd

In [32]:
spec_entropy = pd.DataFrame(spec_ent, columns =["SE x", "SE y", "SE z"])
specroll = pd.DataFrame(spec_roll, columns =["SR x", "SR y", "SR z"])
spec_centr = pd.DataFrame(centr, columns =["SC x", "SC y", "SC z"])
spec_spread = pd.DataFrame(spec_spread, columns =["SS x", "SS y", "SS z"])

df = pd.concat([spec_entropy, specroll, spec_centr, spec_spread], axis =1)
df

Unnamed: 0,SE x,SE y,SE z,SR x,SR y,SR z,SC x,SC y,SC z,SS x,SS y,SS z
0,0.756619,0.616820,0.679121,0.111766,0.151571,0.168404,0.125374,0.140738,0.142228,0.116665,0.127287,0.127694
1,0.739110,0.728294,0.388895,0.130516,0.127891,0.074928,0.130171,0.138467,0.130913,0.123281,0.134549,0.139575
2,0.349087,0.520265,0.740179,0.064711,0.120807,0.148125,0.119070,0.117445,0.123627,0.108087,0.107753,0.104309
3,0.981220,0.980852,0.647781,0.211703,0.207178,0.130827,0.145498,0.147362,0.139835,0.121559,0.121019,0.123417
4,0.702113,0.584191,0.736450,0.106742,0.095584,0.118687,0.145737,0.133073,0.137791,0.135956,0.135165,0.130902
...,...,...,...,...,...,...,...,...,...,...,...,...
95,0.306076,0.405474,0.382612,0.070000,0.064098,0.049883,0.087609,0.088285,0.071536,0.087140,0.084400,0.076900
96,0.313150,0.218369,0.530677,0.061742,0.028885,0.072128,0.080112,0.065091,0.079113,0.077982,0.071396,0.070935
97,1.041992,1.155469,0.682970,0.164531,0.164874,0.123672,0.269108,0.234729,0.211853,0.306912,0.276146,0.268650
98,0.926349,0.674085,0.601212,0.225430,0.096827,0.067875,0.170623,0.125184,0.145472,0.147028,0.135845,0.164032


In [33]:
df = df[np.random.permutation(df.columns)]
df

Unnamed: 0,SE z,SR z,SR y,SS z,SC x,SS x,SS y,SE y,SC z,SC y,SE x,SR x
0,0.679121,0.168404,0.151571,0.127694,0.125374,0.116665,0.127287,0.616820,0.142228,0.140738,0.756619,0.111766
1,0.388895,0.074928,0.127891,0.139575,0.130171,0.123281,0.134549,0.728294,0.130913,0.138467,0.739110,0.130516
2,0.740179,0.148125,0.120807,0.104309,0.119070,0.108087,0.107753,0.520265,0.123627,0.117445,0.349087,0.064711
3,0.647781,0.130827,0.207178,0.123417,0.145498,0.121559,0.121019,0.980852,0.139835,0.147362,0.981220,0.211703
4,0.736450,0.118687,0.095584,0.130902,0.145737,0.135956,0.135165,0.584191,0.137791,0.133073,0.702113,0.106742
...,...,...,...,...,...,...,...,...,...,...,...,...
95,0.382612,0.049883,0.064098,0.076900,0.087609,0.087140,0.084400,0.405474,0.071536,0.088285,0.306076,0.070000
96,0.530677,0.072128,0.028885,0.070935,0.080112,0.077982,0.071396,0.218369,0.079113,0.065091,0.313150,0.061742
97,0.682970,0.123672,0.164874,0.268650,0.269108,0.306912,0.276146,1.155469,0.211853,0.234729,1.041992,0.164531
98,0.601212,0.067875,0.096827,0.164032,0.170623,0.147028,0.135845,0.674085,0.145472,0.125184,0.926349,0.225430


In [34]:
df.to_csv(path + "selected_features_seismo.csv")