In [1]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [2]:
import sys
sys.path.append("../src/data") 

In [3]:
from wav_data_reader import WavDataReader
from signal_dataset import SignalDataset
from independent_wav_feature_extractor import IndependentWavFeatureExtractor
from features_dataset import FeaturesDataset

In [4]:
test_file= r"..\data\interim\testing\#2_#12_230429_SK1_M3_BYB_Recording_2023-04-29_14.42.37.wav"

In [5]:
sample_rate= 10000
reader1= WavDataReader(filename= test_file, sample_rate= sample_rate, extract_key= True)
reader1.get_data()

{12: array([-1.4953613e-03, -1.4953613e-03, -1.4953613e-03, ...,
        -9.1552734e-05, -9.1552734e-05, -9.1552734e-05], dtype=float32)}

In [6]:
signal_dataset1= SignalDataset(signals= reader1.get_values(), labels= reader1.get_keys(), sample_rate= 10000)
signal_dataset1.get_data()

([array([-1.4953613e-03, -1.4953613e-03, -1.4953613e-03, ...,
         -9.1552734e-05, -9.1552734e-05, -9.1552734e-05], dtype=float32)],
 [12])

In [7]:
# You can choose 'peak', 'zscore', or 'min_max'
signal_dataset1.standardize_signals("zscore") 
signal_dataset1.get_datapoint_by_key(12)

(array([-1.4210881 , -1.4210881 , -1.4210881 , ..., -0.02705929,
        -0.02705929, -0.02705929], dtype=float32),
 12)

In [8]:
print(len(signal_dataset1.signals[0]/sample_rate))
time_intervals = [(10, 80)] 
signal_dataset1.trim_signals(time_intervals)
print(signal_dataset1.get_datapoint_by_index(0))
print(len(signal_dataset1.signals[0]/sample_rate))

1255080
(array([-0.20888914, -0.20888914, -0.20888914, ..., -0.11797421,
       -0.11797421, -0.11797421], dtype=float32), 12)
700000


In [9]:
extractor= IndependentWavFeatureExtractor(sample_rate= sample_rate, n_mfcc= 13, n_fft= 2000, hop_length= 500)
features,labels= extractor.extract_all_features(signal_dataset1.signals[0])

for value, label in zip(features, labels):
    print(label,": ",value)

mfcc_1_avg :  -311.14487
mfcc_2_avg :  75.68898
mfcc_3_avg :  30.484838
mfcc_4_avg :  21.789635
mfcc_5_avg :  17.456608
mfcc_6_avg :  13.6166525
mfcc_7_avg :  11.4525175
mfcc_8_avg :  10.0104685
mfcc_9_avg :  8.984079
mfcc_10_avg :  8.19236
mfcc_11_avg :  7.463864
mfcc_12_avg :  6.9191155
mfcc_13_avg :  6.4879026
mfcc_1_std :  30.411032
mfcc_2_std :  18.401423
mfcc_3_std :  7.6227803
mfcc_4_std :  3.681503
mfcc_5_std :  3.4055789
mfcc_6_std :  2.5050993
mfcc_7_std :  2.229995
mfcc_8_std :  2.0488129
mfcc_9_std :  1.9217545
mfcc_10_std :  1.829828
mfcc_11_std :  1.7571698
mfcc_12_std :  1.6609108
mfcc_13_std :  1.6457919
zero_crossing_rate :  1.7142857142857142e-05
root_mean_square_energy :  1.0654374
slope_sign_changes_ratio :  0.012814285714285714
duration_seconds :  70.0
flatness_ratio_10000 :  0.0
flatness_ratio_5000 :  0.016454285714285713
flatness_ratio_1000 :  0.11142142857142857
flatness_ratio_500 :  0.25638285714285713
flatness_ratio_100 :  0.7721085714285715
hjorth_mobility : 

In [10]:
print(labels)

['mfcc_1_avg', 'mfcc_2_avg', 'mfcc_3_avg', 'mfcc_4_avg', 'mfcc_5_avg', 'mfcc_6_avg', 'mfcc_7_avg', 'mfcc_8_avg', 'mfcc_9_avg', 'mfcc_10_avg', 'mfcc_11_avg', 'mfcc_12_avg', 'mfcc_13_avg', 'mfcc_1_std', 'mfcc_2_std', 'mfcc_3_std', 'mfcc_4_std', 'mfcc_5_std', 'mfcc_6_std', 'mfcc_7_std', 'mfcc_8_std', 'mfcc_9_std', 'mfcc_10_std', 'mfcc_11_std', 'mfcc_12_std', 'mfcc_13_std', 'zero_crossing_rate', 'root_mean_square_energy', 'slope_sign_changes_ratio', 'duration_seconds', 'flatness_ratio_10000', 'flatness_ratio_5000', 'flatness_ratio_1000', 'flatness_ratio_500', 'flatness_ratio_100', 'hjorth_mobility', 'hjorth_complexity', 'hurst', 'hurst_r2', 'mean', 'variance', 'standard_deviation', 'interquartile_range', 'skewness', 'kurtosis', 'dfa']


In [11]:
feat_dataset1= FeaturesDataset(features= [features], targets=signal_dataset1.get_labels(),feature_labels=labels)
feat_dataset1.get_data()

([[-311.14487,
   75.68898,
   30.484838,
   21.789635,
   17.456608,
   13.6166525,
   11.4525175,
   10.0104685,
   8.984079,
   8.19236,
   7.463864,
   6.9191155,
   6.4879026,
   30.411032,
   18.401423,
   7.6227803,
   3.681503,
   3.4055789,
   2.5050993,
   2.229995,
   2.0488129,
   1.9217545,
   1.829828,
   1.7571698,
   1.6609108,
   1.6457919,
   1.7142857142857142e-05,
   1.0654374,
   0.012814285714285714,
   70.0,
   0.0,
   0.016454285714285713,
   0.11142142857142857,
   0.25638285714285713,
   0.7721085714285715,
   2.7356301028631535e-06,
   515604.27273836103,
   nan,
   nan,
   0.11915098,
   1.1209601,
   1.0587541,
   1.3940288424491882,
   0.08042018943693519,
   -0.2291486995625478,
   1.6919341097154146]],
 [12])

In [12]:
feat_dataset1.remove_nan_columns()
feat_dataset1.get_data()

Removed columns with NaNs: ['hurst', 'hurst_r2']


([[-311.1448669433594,
   75.68898010253906,
   30.484838485717773,
   21.789634704589844,
   17.456607818603516,
   13.616652488708496,
   11.45251750946045,
   10.010468482971191,
   8.984079360961914,
   8.192359924316406,
   7.463863849639893,
   6.9191155433654785,
   6.487902641296387,
   30.41103172302246,
   18.40142250061035,
   7.6227803230285645,
   3.6815030574798584,
   3.405578851699829,
   2.505099296569824,
   2.229995012283325,
   2.0488128662109375,
   1.9217544794082642,
   1.8298280239105225,
   1.7571698427200317,
   1.6609108448028564,
   1.6457918882369995,
   1.7142857142857142e-05,
   1.0654374361038208,
   0.012814285714285714,
   70.0,
   0.0,
   0.016454285714285713,
   0.11142142857142857,
   0.25638285714285713,
   0.7721085714285715,
   2.7356301028631535e-06,
   515604.27273836103,
   0.11915098130702972,
   1.1209601163864136,
   1.0587540864944458,
   1.3940288424491882,
   0.08042018943693519,
   -0.2291486995625478,
   1.6919341097154146]],
 [12])

In [13]:
feat_dataset1.get_labels()

['mfcc_1_avg',
 'mfcc_2_avg',
 'mfcc_3_avg',
 'mfcc_4_avg',
 'mfcc_5_avg',
 'mfcc_6_avg',
 'mfcc_7_avg',
 'mfcc_8_avg',
 'mfcc_9_avg',
 'mfcc_10_avg',
 'mfcc_11_avg',
 'mfcc_12_avg',
 'mfcc_13_avg',
 'mfcc_1_std',
 'mfcc_2_std',
 'mfcc_3_std',
 'mfcc_4_std',
 'mfcc_5_std',
 'mfcc_6_std',
 'mfcc_7_std',
 'mfcc_8_std',
 'mfcc_9_std',
 'mfcc_10_std',
 'mfcc_11_std',
 'mfcc_12_std',
 'mfcc_13_std',
 'zero_crossing_rate',
 'root_mean_square_energy',
 'slope_sign_changes_ratio',
 'duration_seconds',
 'flatness_ratio_10000',
 'flatness_ratio_5000',
 'flatness_ratio_1000',
 'flatness_ratio_500',
 'flatness_ratio_100',
 'hjorth_mobility',
 'hjorth_complexity',
 'mean',
 'variance',
 'standard_deviation',
 'interquartile_range',
 'skewness',
 'kurtosis',
 'dfa']