In this notebook we are going to focus on features which will be derived from time domain characteristics.

In [1]:
import numpy as np
import math
from scipy.signal import argrelextrema, find_peaks

In [2]:
path = "/Users/ecem/Desktop/phonocardiogram/data/PV/present"

In [3]:
waves = np.load(path + "/present_PV_waves_10sec.npy", allow_pickle= True)

In [4]:
waves.shape

(1198, 40000)

In [5]:
num = 1198

## Mean:

In [6]:
mean_ = np.zeros(num)

In [7]:
for i in range(waves.shape[0]):
    mean_[i] = waves[i].mean()

## Standart deviation:

In [8]:
std_ = np.zeros((num))
for i in range(waves.shape[0]):
    std_[i] = waves[i].std()

## Skewness

In [9]:
from scipy.stats import skew

In [10]:
skew_ = np.zeros((num))
for i in range(waves.shape[0]):
    skew_[i] = skew(waves[i])

## Kurtosis

In [11]:
import scipy.stats

In [12]:
kurt_ = np.zeros((num))
for i in range(waves.shape[0]):
    kurt_[i] =  scipy.stats.kurtosis(waves[i])

## Harmonic Mean

In [13]:
harmonic_ = np.zeros((num))
for i in range(waves.shape[0]):
    harmonic_[i] =  scipy.stats.hmean(waves[i]* waves[i])

## Autocorrelation

In [14]:
from scipy import signal
from scipy.signal import find_peaks

In [15]:
auto_corr = np.zeros((num, 79999))
peaks_ = []
for i in range(waves.shape[0]):
    auto_corr[i,:] = signal.fftconvolve(waves[i], waves[i], mode='full')
    peaks_.append(find_peaks(auto_corr[i], height = 0)[1]["peak_heights"].mean())
    

In [16]:
peaks_ = np.array(peaks_)
peaks_.shape

(1198,)

# Now lets create their dataframes

In [17]:
import pandas as pd

RuntimeError: module compiled against API version 0xe but this version of numpy is 0xd

In [18]:
mean_df = pd.DataFrame(mean_, columns =["mean"])
mean_df.head()

Unnamed: 0,mean
0,0.000223
1,-8.8e-05
2,0.000188
3,0.000247
4,-0.00012


In [19]:
std_df = pd.DataFrame(std_, columns =["std"])
std_df.head()

Unnamed: 0,std
0,0.03271
1,0.042258
2,0.042559
3,0.035224
4,0.029231


In [20]:
kurtosis_df = pd.DataFrame(kurt_, columns = ["kurtosis"])
kurtosis_df.head()

Unnamed: 0,kurtosis
0,11.463146
1,7.173682
2,23.290349
3,11.462254
4,10.447444


In [21]:
skew_df = pd.DataFrame(skew_, columns =["skewness"])
skew_df.head()

Unnamed: 0,skewness
0,0.153055
1,0.014827
2,1.388897
3,0.263047
4,-0.198491


In [22]:
harmonic_df = pd.DataFrame(harmonic_, columns =["harmonic mean"])
harmonic_df.head()

Unnamed: 0,harmonic mean
0,5.490334e-09
1,3.548436e-10
2,2.216367e-08
3,5.297716e-09
4,5.462034e-09


In [27]:
autocorr_peaks_df = pd.DataFrame(peaks_, columns =["ac peak height mean"])
autocorr_peaks_df.head()

Unnamed: 0,ac peak height mean
0,1.09845
1,1.39478
2,1.327466
3,1.011723
4,0.754481


## Now concatenate all features

In [28]:
df = pd.concat([mean_df, std_df, skew_df, kurtosis_df, harmonic_df, autocorr_peaks_df], axis =1)

In [29]:
df

Unnamed: 0,mean,std,skewness,kurtosis,harmonic mean,ac peak height mean
0,0.000223,0.032710,0.153055,11.463146,5.490334e-09,1.098450
1,-0.000088,0.042258,0.014827,7.173682,3.548436e-10,1.394780
2,0.000188,0.042559,1.388897,23.290349,2.216367e-08,1.327466
3,0.000247,0.035224,0.263047,11.462254,5.297716e-09,1.011723
4,-0.000120,0.029231,-0.198491,10.447444,5.462034e-09,0.754481
...,...,...,...,...,...,...
1193,-0.000208,0.050081,0.228460,10.491742,1.300526e-08,2.566832
1194,-0.000440,0.044552,0.333995,11.092413,4.201546e-09,2.166088
1195,-0.000188,0.032899,0.288181,10.986533,2.304217e-09,1.027875
1196,0.000025,0.040093,0.883046,13.957108,2.279876e-09,1.525123


In [30]:
df.to_csv("/Users/ecem/Desktop/phonocardiogram/data/PV/present/PV-statistical-features.csv" )