In this notebook we are going to focus on features which will be derived from time domain characteristics.

In [1]:
import numpy as np
import math
from scipy.signal import argrelextrema, find_peaks

In [2]:
path = "/Users/ecem/Desktop/phonocardiogram/data/MV/present"

In [3]:
waves = np.load(path + "/present_MV_waves_10sec.npy", allow_pickle= True)

In [4]:
waves.shape

(1080, 40000)

In [5]:
num = 1080

## Mean:

In [6]:
mean_ = np.zeros(num)

In [7]:
for i in range(waves.shape[0]):
    mean_[i] = waves[i].mean()

## Standart deviation:

In [8]:
std_ = np.zeros((num))
for i in range(waves.shape[0]):
    std_[i] = waves[i].std()

## Skewness

In [9]:
from scipy.stats import skew

In [10]:
skew_ = np.zeros((num))
for i in range(waves.shape[0]):
    skew_[i] = skew(waves[i])

## Kurtosis

In [11]:
import scipy.stats

In [12]:
kurt_ = np.zeros((num))
for i in range(waves.shape[0]):
    kurt_[i] =  scipy.stats.kurtosis(waves[i])

## Harmonic Mean

In [13]:
harmonic_ = np.zeros((num))
for i in range(waves.shape[0]):
    harmonic_[i] =  scipy.stats.hmean(waves[i]* waves[i])

## Autocorrelation

In [14]:
from scipy import signal
from scipy.signal import find_peaks

In [15]:
auto_corr = np.zeros((num, 79999))
peaks_ = []
for i in range(waves.shape[0]):
    auto_corr[i,:] = signal.fftconvolve(waves[i], waves[i], mode='full')
    peaks_.append(find_peaks(auto_corr[i], height = 0)[1]["peak_heights"].mean())
    

In [16]:
peaks_ = np.array(peaks_)
peaks_.shape

(1080,)

# Now lets create their dataframes

In [17]:
import pandas as pd

RuntimeError: module compiled against API version 0xe but this version of numpy is 0xd

In [18]:
mean_df = pd.DataFrame(mean_, columns =["mean"])
mean_df.head()

Unnamed: 0,mean
0,-0.001512
1,-0.000412
2,0.000172
3,4.8e-05
4,0.000685


In [19]:
std_df = pd.DataFrame(std_, columns =["std"])
std_df.head()

Unnamed: 0,std
0,0.05967
1,0.019801
2,0.011843
3,0.029366
4,0.015039


In [20]:
kurtosis_df = pd.DataFrame(kurt_, columns = ["kurtosis"])
kurtosis_df.head()

Unnamed: 0,kurtosis
0,92.901332
1,15.586249
2,29.625304
3,385.096582
4,14.906036


In [21]:
skew_df = pd.DataFrame(skew_, columns =["skewness"])
skew_df.head()

Unnamed: 0,skewness
0,0.7519
1,-0.659232
2,0.248045
3,4.424341
4,-1.157463


In [22]:
harmonic_df = pd.DataFrame(harmonic_, columns =["harmonic mean"])
harmonic_df.head()

Unnamed: 0,harmonic mean
0,1.627555e-10
1,3.339466e-11
2,4.229319e-10
3,7.171397e-11
4,1.454162e-09


In [23]:
autocorr_peaks_df = pd.DataFrame(peaks_, columns =["ac peak height mean"])
autocorr_peaks_df.head()

Unnamed: 0,ac peak height mean
0,1.072669
1,0.277197
2,0.109943
3,0.353585
4,0.157553


## Now concatenate all features

In [24]:
df = pd.concat([mean_df, std_df, skew_df, kurtosis_df, harmonic_df, autocorr_peaks_df], axis =1)

In [25]:
df

Unnamed: 0,mean,std,skewness,kurtosis,harmonic mean,ac peak height mean
0,-0.001512,0.059670,0.751900,92.901332,1.627555e-10,1.072669
1,-0.000412,0.019801,-0.659232,15.586249,3.339466e-11,0.277197
2,0.000172,0.011843,0.248045,29.625304,4.229319e-10,0.109943
3,0.000048,0.029366,4.424341,385.096582,7.171397e-11,0.353585
4,0.000685,0.015039,-1.157463,14.906036,1.454162e-09,0.157553
...,...,...,...,...,...,...
1075,-0.000493,0.084828,-0.227853,3.791500,4.501717e-10,12.322025
1076,-0.000356,0.082798,-0.004227,3.171172,2.295126e-09,10.250002
1077,-0.000629,0.082643,-0.107542,3.280987,1.091081e-10,9.873036
1078,0.000074,0.082421,-0.107801,3.698629,4.424084e-08,10.836644


In [26]:
df.to_csv("/Users/ecem/Desktop/phonocardiogram/data/MV/present/MV-statistical-features.csv" )