In this notebook we are going to focus on features which will be derived from time domain characteristics.

In [1]:
import numpy as np
import math
from scipy.signal import argrelextrema, find_peaks

In [2]:
path = "/Users/ecem/Desktop/phonocardiogram/data/TV/present"

In [3]:
waves = np.load(path + "/present_TV_waves_10sec.npy", allow_pickle= True)

In [4]:
waves.shape

(1205, 40000)

In [5]:
num = 1205

## Mean:

In [6]:
mean_ = np.zeros(num)

In [7]:
for i in range(waves.shape[0]):
    mean_[i] = waves[i].mean()

## Standart deviation:

In [8]:
std_ = np.zeros((num))
for i in range(waves.shape[0]):
    std_[i] = waves[i].std()

## Skewness

In [9]:
from scipy.stats import skew

In [10]:
skew_ = np.zeros((num))
for i in range(waves.shape[0]):
    skew_[i] = skew(waves[i])

## Kurtosis

In [11]:
import scipy.stats

In [12]:
kurt_ = np.zeros((num))
for i in range(waves.shape[0]):
    kurt_[i] =  scipy.stats.kurtosis(waves[i])

## Harmonic Mean

In [13]:
harmonic_ = np.zeros((num))
for i in range(waves.shape[0]):
    harmonic_[i] =  scipy.stats.hmean(waves[i]* waves[i])

## Autocorrelation

In [14]:
from scipy import signal
from scipy.signal import find_peaks

In [17]:
auto_corr = np.zeros((num, 79999))
peaks_ = []
for i in range(waves.shape[0]):
    auto_corr[i,:] = signal.fftconvolve(waves[i], waves[i], mode='full')
    peaks_.append(find_peaks(auto_corr[i], height = 0)[1]["peak_heights"].mean())
    

In [18]:
peaks_ = np.array(peaks_)
peaks_.shape

(1205,)

# Now lets create their dataframes

In [19]:
import pandas as pd

RuntimeError: module compiled against API version 0xe but this version of numpy is 0xd

In [20]:
mean_df = pd.DataFrame(mean_, columns =["mean"])
mean_df.head()

Unnamed: 0,mean
0,0.00039
1,0.001652
2,0.00189
3,0.00062
4,-0.000184


In [21]:
std_df = pd.DataFrame(std_, columns =["std"])
std_df.head()

Unnamed: 0,std
0,0.040663
1,0.037293
2,0.067046
3,0.059013
4,0.144523


In [22]:
kurtosis_df = pd.DataFrame(kurt_, columns = ["kurtosis"])
kurtosis_df.head()

Unnamed: 0,kurtosis
0,121.147759
1,112.801709
2,114.252388
3,102.665325
4,21.109016


In [23]:
skew_df = pd.DataFrame(skew_, columns =["skewness"])
skew_df.head()

Unnamed: 0,skewness
0,-4.327962
1,-4.281472
2,-1.281737
3,0.07906
4,0.106205


In [24]:
harmonic_df = pd.DataFrame(harmonic_, columns =["harmonic mean"])
harmonic_df.head()

Unnamed: 0,harmonic mean
0,8.753988e-10
1,1.891145e-09
2,0.0
3,1.086456e-09
4,1.717525e-10


In [25]:
autocorr_peaks_df = pd.DataFrame(peaks_, columns =["ac peak height mean"])
autocorr_peaks_df.head()

Unnamed: 0,ac peak height mean
0,0.701984
1,0.684384
2,1.54122
3,1.132707
4,18.222118


## Now concatenate all features

In [26]:
df = pd.concat([mean_df, std_df, skew_df, kurtosis_df, harmonic_df, autocorr_peaks_df], axis =1)

In [27]:
df

Unnamed: 0,mean,std,skewness,kurtosis,harmonic mean,ac peak height mean
0,0.000390,0.040663,-4.327962,121.147759,8.753988e-10,0.701984
1,0.001652,0.037293,-4.281472,112.801709,1.891145e-09,0.684384
2,0.001890,0.067046,-1.281737,114.252388,0.000000e+00,1.541220
3,0.000620,0.059013,0.079060,102.665325,1.086456e-09,1.132707
4,-0.000184,0.144523,0.106205,21.109016,1.717525e-10,18.222118
...,...,...,...,...,...,...
1200,-0.000341,0.222230,0.235907,3.293451,2.340264e-08,94.429661
1201,-0.000602,0.191557,0.023518,4.545471,2.591139e-07,69.914666
1202,0.000464,0.214602,0.145022,4.014102,1.628049e-07,100.846936
1203,-0.000350,0.190516,0.021085,3.707966,3.945123e-07,71.007218


In [28]:
df.to_csv("/Users/ecem/Desktop/phonocardiogram/data/TV/present/TV-statistical-features.csv" )