In [1]:
import pandas as pd 
from scipy.stats import mannwhitneyu
from scipy import stats
import numpy as np

In [2]:
AV_path =  "/Users/ecem/Desktop/phonocardiogram/data/AV/present"
MV_path =  "/Users/ecem/Desktop/phonocardiogram/data/MV/present"
PV_path =  "/Users/ecem/Desktop/phonocardiogram/data/PV/present"
TV_path =  "/Users/ecem/Desktop/phonocardiogram/data/TV/present"

# Time domain features

In [3]:
av = pd.read_csv(AV_path + "/AV-mock-features.csv", index_col = 0)
mv = pd.read_csv(MV_path + "/MV-mock-features.csv", index_col = 0)
pv = pd.read_csv(PV_path + "/PV-mock-features.csv", index_col = 0)
tv = pd.read_csv(TV_path + "/TV-mock-features.csv", index_col = 0)

In [4]:
av

Unnamed: 0,bandpower,petrosian f.a.,katz f.a.,higuchi f.a.,deterended fluctuations f.a.
0,0.001371,1.002736,1.920990,1.091685,0.921825
1,0.000066,1.002308,2.377508,1.052431,0.904970
2,0.000024,1.002273,2.223252,1.046259,0.982775
3,0.000038,1.002218,2.291023,1.048935,0.942318
4,0.000043,1.002254,2.274571,1.046809,0.918872
...,...,...,...,...,...
1240,0.000084,1.002914,1.958447,1.109790,1.264767
1241,0.000134,1.002629,1.833902,1.087668,1.251488
1242,0.000113,1.002242,1.998854,1.075552,1.257509
1243,0.000171,1.002314,1.838299,1.090169,1.253572


### AV-MV

In [5]:
test = mannwhitneyu(av, mv,
                    use_continuity=False, alternative = 'two-sided')
stats = pd.DataFrame(test[0])
p = pd.DataFrame(test[1])


In [6]:
p.T.style.apply(lambda x: ["background: pink" if v < 0.05 else "" for v in x], axis = 1)

Unnamed: 0,0,1,2,3,4
0,0.045227,0.0,0.25104,0.0,0.624027


In [7]:
av_mv = p[(p < 0.05).any(axis=1)].index
av_mv

Int64Index([0, 1, 3], dtype='int64')

### AV-PV

In [8]:
test = mannwhitneyu(av, pv, use_continuity=False, alternative = 'two-sided')
stats = pd.DataFrame(test[0])
p = pd.DataFrame(test[1])

av_pv = p[(p < 0.05).any(axis=1)].index

print(av_pv)

Int64Index([2, 3, 4], dtype='int64')


In [9]:
p.T.style.apply(lambda x: ["background: pink" if v < 0.05 else "" for v in x], axis = 1)

Unnamed: 0,0,1,2,3,4
0,0.224854,0.061482,0.0,0.000542,0.0


### AV-TV

In [10]:
test = mannwhitneyu(av, tv, use_continuity=False, alternative = 'two-sided')
stats = pd.DataFrame(test[0])
p = pd.DataFrame(test[1])

p.T.style.apply(lambda x: ["background: pink" if v < 0.05 else "" for v in x], axis = 1)

av_tv = p[(p < 0.05).any(axis=1)].index

print(av_tv)

Int64Index([1, 2, 3, 4], dtype='int64')


In [11]:
p.T.style.apply(lambda x: ["background: pink" if v < 0.05 else "" for v in x], axis = 1)

Unnamed: 0,0,1,2,3,4
0,0.761824,0.0,0.0,0.0,0.0


### MV-PV

In [12]:
test = mannwhitneyu(mv, pv, use_continuity=False, alternative = 'two-sided')
stats = pd.DataFrame(test[0])
p = pd.DataFrame(test[1])


mv_pv = p[(p < 0.05).any(axis=1)].index

print(mv_pv)

Int64Index([1, 2, 3, 4], dtype='int64')


In [13]:
p.T.style.apply(lambda x: ["background: pink" if v < 0.05 else "" for v in x], axis = 1)

Unnamed: 0,0,1,2,3,4
0,0.344443,0.0,7.8e-05,0.0,0.0


### MV_TV

In [14]:
test = mannwhitneyu(mv, tv, use_continuity=False, alternative = 'two-sided')
stats = pd.DataFrame(test[0])
p = pd.DataFrame(test[1])

mv_tv = p[(p < 0.05).any(axis=1)].index

print(mv_tv)

Int64Index([2, 4], dtype='int64')


In [15]:
p.T.style.apply(lambda x: ["background: pink" if v < 0.05 else "" for v in x], axis = 1)

Unnamed: 0,0,1,2,3,4
0,0.076066,0.237673,0.0,0.114408,0.0


### PV-TV

In [16]:
test = mannwhitneyu(pv, tv, use_continuity=False, alternative = 'two-sided')
stats = pd.DataFrame(test[0])
p = pd.DataFrame(test[1])

pv_tv = p[(p < 0.05).any(axis=1)].index

print(pv_tv)

Int64Index([1, 3, 4], dtype='int64')


In [17]:
p.T.style.apply(lambda x: ["background: pink" if v < 0.05 else "" for v in x], axis = 1)

Unnamed: 0,0,1,2,3,4
0,0.303923,0.0,0.052608,0.0,0.017737


## as a result we will only exclude the spectral spread. So lets finalize the final dataframe.

In [18]:
df = pd.concat([av, mv, pv, tv], axis =0)
df.head()

Unnamed: 0,bandpower,petrosian f.a.,katz f.a.,higuchi f.a.,deterended fluctuations f.a.
0,0.001371,1.002736,1.92099,1.091685,0.921825
1,6.6e-05,1.002308,2.377508,1.052431,0.90497
2,2.4e-05,1.002273,2.223252,1.046259,0.982775
3,3.8e-05,1.002218,2.291023,1.048935,0.942318
4,4.3e-05,1.002254,2.274571,1.046809,0.918872


In [19]:
df.drop(["bandpower"],axis =1, inplace = True)
df

Unnamed: 0,petrosian f.a.,katz f.a.,higuchi f.a.,deterended fluctuations f.a.
0,1.002736,1.920990,1.091685,0.921825
1,1.002308,2.377508,1.052431,0.904970
2,1.002273,2.223252,1.046259,0.982775
3,1.002218,2.291023,1.048935,0.942318
4,1.002254,2.274571,1.046809,0.918872
...,...,...,...,...
1200,1.001457,1.917979,1.016475,1.278429
1201,1.001736,1.853328,1.022238,1.283995
1202,1.001800,1.901451,1.022018,1.306491
1203,1.002260,1.815818,1.054990,1.273177


In [20]:
df.to_csv("/Users/ecem/Desktop/phonocardiogram/data/mock-features.csv")