In [1]:
import numpy as np
import math

### define functions:

In [2]:
def entropy(data, num_short_blocks=10):
    eol = np.sum(np.square(data))
    win_len = len(data)
    sub_win_len = math.floor(win_len / num_short_blocks)

    if win_len != sub_win_len * num_short_blocks:
        data = data[0:sub_win_len * num_short_blocks]
    sub_wins = data.reshape(sub_win_len, num_short_blocks, order='F').copy()
    norm_sub_frame_energies = np.zeros((1, sub_wins.shape[1]))
    for i in range(sub_wins.shape[1]):
        norm_sub_frame_energies[0, i] = np.sum(np.square(sub_wins[:, i])) / (eol + np.spacing(1))
    energy_entropy = 0
    for i in range(norm_sub_frame_energies.shape[1]):
        energy_entropy -= norm_sub_frame_energies[0, i] * math.log(norm_sub_frame_energies[0, i] + np.spacing(1), 2)
    return energy_entropy

In [3]:
def dft(data, f_s = 4000, p=0):
    win_len = len(data)
    fft = np.abs(np.fft.fft(data)) / win_len
    if not p:
        fft = fft[0:math.ceil(win_len)]
        f_req = (f_s / 2) * np.arange(0, np.ceil(win_len / 2) + 1) / np.ceil(win_len / 2)
    else:
        fft = np.fft.fftshift(fft)
        if win_len % 2:
            f_req = np.arange(-(win_len - 1) / 2, (win_len - 1) / 2 + 1)
        else:
            f_req = np.arange(-win_len / 2, win_len / 2)
    fft_1 = np.abs(fft)/win_len
    fft_2 = fft_1[1:(round(win_len / 2) + 1)]
    fft_2 = 2*fft_2
    return fft_2, f_req

In [4]:
def spectral_rolloff(data, c=0.90):
    total_energy = np.sum(np.square(data))
    curr_energy = 0
    count_fft = 0
    fft_len = len(data)
    while curr_energy <= c * total_energy and count_fft <= fft_len:
        curr_energy += data[count_fft] ** 2
        count_fft += 1
    count_fft -= 1
    return (count_fft - 1) / fft_len

In [5]:
def spectral_centroid(data, f_s = 4000):
    fft_len = len(data)
    m = np.transpose((f_s / (2 * fft_len)) * np.arange(1, fft_len+1))
    data = data / np.max(data)
    c = np.sum(np.multiply(m, data)) / (np.sum(data) + np.spacing(1))
    k = np.sum(np.square(m - c) * data)
    l = (np.sum(data) + np.spacing(1))
    
    if k*l <0 :
        s = math.sqrt( -1* k / l ) / (f_s / 2)
        c = c / (f_s / 2)
        return c
    else:
        s = math.sqrt( k / l ) / (f_s / 2)
        c = c / (f_s / 2)
        return c

In [6]:
def spectral_spread(data, f_s=4000):
    fft_len = len(data)
    m = np.transpose((f_s / (2 * fft_len)) * np.arange(1, fft_len+1))
    data = data / np.max(data)
    c = np.sum(np.multiply(m, data)) / (np.sum(data) + np.spacing(1))
    k = np.sum(np.square(m - c) * data)
    l = (np.sum(data) + np.spacing(1))
    
    if k*l <0 :
        s = math.sqrt( -1* k / l ) / (f_s / 2)
        c = c / (f_s / 2)
        return c
    else:
        s = math.sqrt( k / l ) / (f_s / 2)
        c = c / (f_s / 2)
        return s

### define data

In [7]:
path = "/Users/ecem/Desktop/gyrocardiogram/phase2/"

In [8]:
x = np.load(path + "100_x_splitted.npy", allow_pickle= True)
y = np.load(path + "100_y_splitted.npy", allow_pickle= True)
z = np.load(path + "100_z_splitted.npy", allow_pickle= True)

In [9]:
x.shape

(100,)

In [10]:
len(x[0])

44

In [11]:
len(y[0])

44

In [12]:
len(z[0])

44

In [13]:
x[0][0].shape

(2560,)

In [14]:
diseased = np.vstack((x, y, z))

In [15]:
#sanity check
print(diseased.shape)

(3, 100)


## Feature Extraction:


### Spectral Entropy:

In [16]:
dft_ = dft(diseased[0][1][0])[0]

In [17]:
entropy(dft_)

0.6457100784593657

In [18]:
spec_ent = np.ndarray((diseased.shape[1], 3))
for i in range(0, diseased.shape[-1]):
    x_sum, y_sum,z_sum = 0,0,0
    for j in range(0,len(x[i])):
        x_ = dft(diseased[0][i][j])[0] #her 10 sn lik kesitler
        y_ = dft(diseased[1][i][j])[0]
        z_ = dft(diseased[2][i][j])[0]

        x_sum += entropy(x_, num_short_blocks = 10) 
        y_sum += entropy(y_, num_short_blocks = 10)
        z_sum += entropy(z_, num_short_blocks = 10)
    
    spec_ent[i, 0] = x_sum/100
    spec_ent[i, 1] = y_sum/100
    spec_ent[i, 2] = z_sum/100                  

In [19]:
spec_ent.shape

(100, 3)

### Spectral Rolloff

In [20]:
spectral_rolloff(dft_)

0.10390625

In [21]:
spec_roll = np.ndarray((diseased.shape[1],3))
for i in range(0, diseased.shape[-1]):
    x_sum, y_sum, z_sum = 0,0,0
    for j in range(0, len(x[i])):
        x_ = dft(diseased[0][i][j])[0]
        y_ = dft(diseased[1][i][j])[0]
        z_ = dft(diseased[2][i][j])[0]

        x_sum += spectral_rolloff(x_)
        y_sum += spectral_rolloff(y_)
        z_sum += spectral_rolloff(z_)
        
    spec_roll[i,0] = x_sum / diseased.shape[-1]
    spec_roll[i,1] = y_sum / diseased.shape[-1]
    spec_roll[i,2] = z_sum / diseased.shape[-1]
    

In [22]:
spec_roll.shape

(100, 3)

In [23]:
spec_roll

array([[0.04815625, 0.04051314, 0.03814143],
       [0.07140625, 0.08113509, 0.0953719 ],
       [0.09050781, 0.04452886, 0.10034198],
       [0.13278125, 0.18767123, 0.03902434],
       [0.10224219, 0.03608089, 0.08017969],
       [0.10211719, 0.25600156, 0.02527708],
       [0.02710156, 0.14572658, 0.03839388],
       [0.10921094, 0.07623032, 0.0960185 ],
       [0.03217188, 0.10753521, 0.07349145],
       [0.07096875, 0.06833333, 0.06964973],
       [0.0680625 , 0.12337058, 0.06448841],
       [0.1775    , 0.23066861, 0.04245751],
       [0.19178906, 0.04212593, 0.02491354],
       [0.05280469, 0.04933171, 0.09963983],
       [0.07015625, 0.02648905, 0.10270802],
       [0.06092188, 0.04952707, 0.02001387],
       [0.16089062, 0.04094385, 0.03512039],
       [0.05272656, 0.0230659 , 0.02115074],
       [0.06605469, 0.03507868, 0.10589166],
       [0.02420313, 0.07121805, 0.03519608],
       [0.08228125, 0.06100935, 0.08081345],
       [0.02777344, 0.08926362, 0.06642801],
       [0.

### Spectral Centroid:

In [24]:
spectral_centroid(dft_)

0.1754576256117317

In [25]:
centr = np.ndarray((diseased.shape[1], 3))
for i in range(0, diseased.shape[-1]):
    x_sum, y_sum, z_sum = 0,0,0
    for j in range(0, len(x[i])):
        x_ = dft(diseased[0][i][j])[0]
        y_ = dft(diseased[1][i][j])[0]
        z_ = dft(diseased[2][i][j])[0]

        x_sum += spectral_centroid(x_)
        y_sum += spectral_centroid(y_)
        z_sum += spectral_centroid(z_)

    centr[i,0] = x_sum / diseased.shape[-1]
    centr[i,1] = y_sum / diseased.shape[-1]
    centr[i,2] = z_sum / diseased.shape[-1]

In [26]:
centr.shape

(100, 3)

In [27]:
centr

array([[0.11888152, 0.12942806, 0.12902428],
       [0.11348473, 0.12705754, 0.1455202 ],
       [0.11583033, 0.10036056, 0.11400725],
       [0.13014111, 0.14749576, 0.1390393 ],
       [0.14012381, 0.1236371 , 0.12907688],
       [0.14200104, 0.17345151, 0.13772633],
       [0.11145591, 0.17894474, 0.12394909],
       [0.11571935, 0.13192231, 0.11460632],
       [0.10639905, 0.13592749, 0.11257278],
       [0.12175884, 0.1096341 , 0.11119383],
       [0.1252797 , 0.15465503, 0.13348121],
       [0.17150657, 0.18435181, 0.14118002],
       [0.14091221, 0.11749316, 0.07032677],
       [0.10650916, 0.12351766, 0.12316984],
       [0.11198211, 0.07278711, 0.11035847],
       [0.08286697, 0.0945628 , 0.06392421],
       [0.13419219, 0.08568834, 0.10774673],
       [0.08277401, 0.08671529, 0.05592985],
       [0.11585612, 0.09726884, 0.13544203],
       [0.0920293 , 0.10528408, 0.09039869],
       [0.12844159, 0.12047375, 0.10517274],
       [0.07939308, 0.11116497, 0.10240588],
       [0.

### Spectral Spread

In [28]:
spec_spread = np.ndarray((diseased.shape[1], 3))
for i in range(diseased.shape[-1]):
    x_sum, y_sum, z_sum = 0,0,0
    for j in range(0, len(x[i])):
        x_ = dft(diseased[0][i][j])[0]
        y_ = dft(diseased[1][i][j])[0]
        z_ = dft(diseased[2][i][j])[0]
        
        x_sum += spectral_spread(x_)
        y_sum += spectral_spread(y_)
        z_sum += spectral_spread(z_)
    
    spec_spread[i,0] =  x_sum / diseased.shape[-1]
    spec_spread[i,1] =  y_sum / diseased.shape[-1]
    spec_spread[i,2] =  z_sum / diseased.shape[-1]


In [29]:
spec_spread.shape

(100, 3)

In [30]:
spec_spread

array([[0.12967717, 0.13257591, 0.13605127],
       [0.1261473 , 0.13502295, 0.15080098],
       [0.11218109, 0.10869039, 0.10705513],
       [0.12598137, 0.13419475, 0.13539032],
       [0.13768329, 0.14162331, 0.13580081],
       [0.14647219, 0.15188112, 0.14846514],
       [0.13802057, 0.16602206, 0.14924693],
       [0.11024661, 0.13469265, 0.11793149],
       [0.13681015, 0.1378114 , 0.12620586],
       [0.13071787, 0.12824851, 0.12458638],
       [0.14211784, 0.15598052, 0.14875319],
       [0.16162294, 0.1733778 , 0.16461279],
       [0.12271527, 0.12404478, 0.09897502],
       [0.12804572, 0.13785807, 0.12961195],
       [0.11422105, 0.09823401, 0.11012269],
       [0.09516035, 0.10404012, 0.08957825],
       [0.12203284, 0.10467163, 0.11988731],
       [0.09729342, 0.1051224 , 0.08438794],
       [0.12510364, 0.11640539, 0.1264114 ],
       [0.10867434, 0.10819502, 0.10530119],
       [0.13211362, 0.13098001, 0.11676302],
       [0.10307954, 0.11331064, 0.1085084 ],
       [0.

### Create DataFrame

In [31]:
import pandas as pd

In [32]:
spec_entropy = pd.DataFrame(spec_ent, columns =["SE x", "SE y", "SE z"])
specroll = pd.DataFrame(spec_roll, columns =["SR x", "SR y", "SR z"])
spec_centr = pd.DataFrame(centr, columns =["SC x", "SC y", "SC z"])
spec_spread = pd.DataFrame(spec_spread, columns =["SS x", "SS y", "SS z"])

df = pd.concat([spec_entropy, specroll, spec_centr, spec_spread], axis =1)
df

Unnamed: 0,SE x,SE y,SE z,SR x,SR y,SR z,SC x,SC y,SC z,SS x,SS y,SS z
0,0.346638,0.301639,0.283458,0.048156,0.040513,0.038141,0.118882,0.129428,0.129024,0.129677,0.132576,0.136051
1,0.518874,0.624197,0.398359,0.071406,0.081135,0.095372,0.113485,0.127058,0.145520,0.126147,0.135023,0.150801
2,0.446310,0.322765,0.493944,0.090508,0.044529,0.100342,0.115830,0.100361,0.114007,0.112181,0.108690,0.107055
3,0.549191,0.611675,0.305170,0.132781,0.187671,0.039024,0.130141,0.147496,0.139039,0.125981,0.134195,0.135390
4,0.583176,0.266599,0.617320,0.102242,0.036081,0.080180,0.140124,0.123637,0.129077,0.137683,0.141623,0.135801
...,...,...,...,...,...,...,...,...,...,...,...,...
95,0.394827,0.204549,0.215406,0.117375,0.028969,0.032827,0.100810,0.084148,0.080152,0.092403,0.088180,0.089695
96,0.278854,0.329637,0.263483,0.075867,0.077933,0.084681,0.075995,0.080143,0.093342,0.079619,0.076225,0.085031
97,0.191763,0.215004,0.141804,0.028727,0.032946,0.024837,0.038995,0.047402,0.042540,0.048224,0.057751,0.052510
98,0.042198,0.074269,0.028996,0.008656,0.011459,0.006195,0.016169,0.021791,0.013771,0.022028,0.025692,0.021298


In [33]:
df = df[np.random.permutation(df.columns)]
df

Unnamed: 0,SC x,SS x,SC z,SE x,SR z,SR x,SE z,SC y,SS z,SR y,SS y,SE y
0,0.118882,0.129677,0.129024,0.346638,0.038141,0.048156,0.283458,0.129428,0.136051,0.040513,0.132576,0.301639
1,0.113485,0.126147,0.145520,0.518874,0.095372,0.071406,0.398359,0.127058,0.150801,0.081135,0.135023,0.624197
2,0.115830,0.112181,0.114007,0.446310,0.100342,0.090508,0.493944,0.100361,0.107055,0.044529,0.108690,0.322765
3,0.130141,0.125981,0.139039,0.549191,0.039024,0.132781,0.305170,0.147496,0.135390,0.187671,0.134195,0.611675
4,0.140124,0.137683,0.129077,0.583176,0.080180,0.102242,0.617320,0.123637,0.135801,0.036081,0.141623,0.266599
...,...,...,...,...,...,...,...,...,...,...,...,...
95,0.100810,0.092403,0.080152,0.394827,0.032827,0.117375,0.215406,0.084148,0.089695,0.028969,0.088180,0.204549
96,0.075995,0.079619,0.093342,0.278854,0.084681,0.075867,0.263483,0.080143,0.085031,0.077933,0.076225,0.329637
97,0.038995,0.048224,0.042540,0.191763,0.024837,0.028727,0.141804,0.047402,0.052510,0.032946,0.057751,0.215004
98,0.016169,0.022028,0.013771,0.042198,0.006195,0.008656,0.028996,0.021791,0.021298,0.011459,0.025692,0.074269


In [34]:
df.to_csv(path + "selected_features.csv")