In this notebook we are going to focus on features which will be derived from time domain characteristics.

In [1]:
import numpy as np
import math
from scipy.signal import argrelextrema, find_peaks

## Energy:

In [2]:
def energy(data):
    return (1 / len(data)) * np.sum(np.square(data))

In [3]:
path = "/Users/ecem/Desktop/phonocardiogram/data/AV/absent"

In [4]:
waves = np.load(path + "/absent_AV_waves_10sec.npy", allow_pickle= True)

In [5]:
waves.shape

(6980, 40000)

In [6]:
energy_ = []
for i in range(waves.shape[0]):
    energy_.append(energy(waves[i]))

In [7]:
energy_ = np.array(energy_)

In [8]:
energy_.shape

(6980,)

## Entropy:

Entropy is a thermodynamics concept that measures the molecular disorder in a closed system. This concept is used in nonlinear dynamical systems to quantify the degree of complexity. Entropy is an interesting tool for analyzing time series, as it does not consider any constraints on the probability distribution [7]. Shannon entropy (ShEn) and conditional entropy (ConEn) are the basic measures used for evaluating entropy. ShEn and ConEn measure the amount of information and the rate of information generation, respectively [1]. Based on these measures, other entropy measures have been introduced for evaluating the complexity of time series

In [9]:
def entropy(data, num_short_blocks=10):
    eol = np.sum(np.square(data))
    win_len = len(data)
    sub_win_len = math.floor(win_len / num_short_blocks)

    if win_len != sub_win_len * num_short_blocks:
        data = data[0:sub_win_len * num_short_blocks]
    sub_wins = data.reshape(sub_win_len, num_short_blocks, order='F').copy()
    norm_sub_frame_energies = np.zeros((1, sub_wins.shape[1]))
    for i in range(sub_wins.shape[1]):
        norm_sub_frame_energies[0, i] = np.sum(np.square(sub_wins[:, i])) / (eol + np.spacing(1))
    energy_entropy = 0
    for i in range(norm_sub_frame_energies.shape[1]):
        energy_entropy -= norm_sub_frame_energies[0, i] * math.log(norm_sub_frame_energies[0, i] + np.spacing(1), 2)
    return energy_entropy

# SOR: num_short_blocks ne olmalı ?

In [10]:
entropy(waves[0], num_short_blocks = 10)

2.591618813468979

In [11]:
entropy_ = []
for i in range(waves.shape[0]):
    entropy_.append(entropy(waves[i], num_short_blocks = 10))

In [12]:
entropy_ = np.array(entropy_)
entropy_.shape

(6980,)

## discrete fourier transform

In [13]:
def dft(data, f_s = 4000, p=0):
    win_len = len(data)
    fft = np.abs(np.fft.fft(data)) / win_len
    if not p:
        fft = fft[0:math.ceil(win_len)]
        f_req = (f_s / 2) * np.arange(0, np.ceil(win_len / 2) + 1) / np.ceil(win_len / 2)
    else:
        fft = np.fft.fftshift(fft)
        if win_len % 2:
            f_req = np.arange(-(win_len - 1) / 2, (win_len - 1) / 2 + 1)
        else:
            f_req = np.arange(-win_len / 2, win_len / 2)
    fft_1 = np.abs(fft)/win_len
    fft_2 = fft_1[1:(round(win_len / 2) + 1)]
    fft_2 = 2*fft_2
    return fft_2, f_req

## Spectral Entropy

In [14]:
spec_ent = []
for i in range(waves.shape[0]):
    fft, _ = dft(waves[i])
    spec_ent.append(entropy(fft, 10))

In [15]:
spec_ent = np.array(spec_ent)

In [16]:
spec_ent.shape

(6980,)

## Spectral RollOff

Spectral rolloff is the frequency below which a specified percentage of the total spectral energy, e.g. 85%, lies

In [17]:
def spectral_rolloff(data, c=0.90):
    total_energy = np.sum(np.square(data))
    curr_energy = 0
    count_fft = 0
    fft_len = len(data)
    while curr_energy <= c * total_energy and count_fft <= fft_len:
        curr_energy += data[count_fft] ** 2
        count_fft += 1
    count_fft -= 1
    return (count_fft - 1) / fft_len

In [18]:
fft, _ = dft(waves[0])
spectral_rolloff(fft)

0.05505

In [19]:
rolloff = []
for i in range(waves.shape[0]):
    fft, _ = dft(waves[i])
    rolloff.append(spectral_rolloff(fft))

In [20]:
rolloff = np.array(rolloff)
rolloff.shape

(6980,)

## Spectral Centroid

The spectral centroid (Wikipedia) indicates at which frequency the energy of a spectrum is centered upon. 

## SOR: burada bir pozitif negatif sıkıntısı vardı. Aşağıdaki gibi çözdüm sıkıntı oluyor mu ? 

In [21]:
def spectral_centroid(data, f_s = 4000):
    fft_len = len(data)
    m = np.transpose((f_s / (2 * fft_len)) * np.arange(1, fft_len+1))
    data = data / np.max(data)
    c = np.sum(np.multiply(m, data)) / (np.sum(data) + np.spacing(1))
    k = np.sum(np.square(m - c) * data)
    l = (np.sum(data) + np.spacing(1))
    
    if k*l <0 :
        s = math.sqrt( -1* k / l ) / (f_s / 2)
        c = c / (f_s / 2)
        return c
    else:
        s = math.sqrt( k / l ) / (f_s / 2)
        c = c / (f_s / 2)
        return c

In [22]:
centroid = []
for i in range(waves.shape[0]):
    fft, _ = dft(waves[i])
    centroid.append(spectral_centroid(fft))

In [23]:
centroid = np.array(centroid)
centroid.shape

(6980,)

## Spectral spread:

In [24]:
def spectral_spread(data, f_s=4000):
    fft_len = len(data)
    m = np.transpose((f_s / (2 * fft_len)) * np.arange(1, fft_len+1))
    data = data / np.max(data)
    c = np.sum(np.multiply(m, data)) / (np.sum(data) + np.spacing(1))
    k = np.sum(np.square(m - c) * data)
    l = (np.sum(data) + np.spacing(1))
    
    if k*l <0 :
        s = math.sqrt( -1* k / l ) / (f_s / 2)
        c = c / (f_s / 2)
        return c
    else:
        s = math.sqrt( k / l ) / (f_s / 2)
        c = c / (f_s / 2)
        return s

In [25]:
spread = []
for i in range(waves.shape[0]):
    fft, _ = dft(waves[i])
    spread.append(spectral_spread(fft))

In [26]:
spread = np.array( spread)
spread.shape

(6980,)

# Now lets create their dataframes

In [27]:
import pandas as pd

In [28]:
energy_df = pd.DataFrame(energy_, columns =["energy"])
energy_df.head()

Unnamed: 0,energy
0,0.002254
1,0.001892
2,0.00107
3,0.001276
4,0.000687


In [29]:
entropy_df = pd.DataFrame(entropy_, columns =["entropy"])
entropy_df.head()

Unnamed: 0,entropy
0,2.591619
1,2.255495
2,2.356572
3,2.510904
4,2.930359


In [30]:
spec_ent_df = pd.DataFrame(spec_ent, columns = ["spectral entropy"])
spec_ent_df.head()

Unnamed: 0,spectral entropy
0,0.071787
1,0.095646
2,0.049112
3,0.019241
4,0.095409


In [31]:
rolloff_df = pd.DataFrame(rolloff, columns =["spectral rolloff"])
rolloff_df.head()

Unnamed: 0,spectral rolloff
0,0.05505
1,0.0568
2,0.05395
3,0.05245
4,0.055


In [32]:
centroid_df = pd.DataFrame(centroid, columns =["spectral centorid"])
centroid_df.head()

Unnamed: 0,spectral centorid
0,0.047869
1,0.046744
2,0.041128
3,0.041156
4,0.04941


In [33]:
spread_df = pd.DataFrame(spread, columns =["spectral spread"])
spread_df.head()

Unnamed: 0,spectral spread
0,0.056253
1,0.045214
2,0.033497
3,0.05399
4,0.074455


## Now concatenate all features

In [34]:
df = pd.concat([energy_df, entropy_df, spec_ent_df, rolloff_df, centroid_df, spread_df], axis =1)

In [35]:
df

Unnamed: 0,energy,entropy,spectral entropy,spectral rolloff,spectral centorid,spectral spread
0,0.002254,2.591619,0.071787,0.05505,0.047869,0.056253
1,0.001892,2.255495,0.095646,0.05680,0.046744,0.045214
2,0.001070,2.356572,0.049112,0.05395,0.041128,0.033497
3,0.001276,2.510904,0.019241,0.05245,0.041156,0.053990
4,0.000687,2.930359,0.095409,0.05500,0.049410,0.074455
...,...,...,...,...,...,...
6975,0.000623,2.569858,0.595304,0.10840,0.066223,0.045528
6976,0.000287,2.702229,0.311879,0.08725,0.053880,0.044667
6977,0.000201,3.143081,0.308364,0.08030,0.059191,0.081845
6978,0.000250,2.891970,0.168473,0.03455,0.049848,0.075712


In [36]:
df.to_csv("/Users/ecem/Desktop/phonocardiogram/data/AV/absent/AV-time-domain-features.csv" )