# Audio chunk
* Extract features for cough chunks
  * AICovidVN: 31,542
  * Coswara: 22,878
  * Coughvid: 84,981

## Data

### Set up

In [None]:
from IPython.display import clear_output, Audio
import IPython.display as ipd
import os
import numpy as np
import shutil
import pandas as pd
import logging

logger = logging.getLogger('MOBILENET')
logger.setLevel(logging.DEBUG)

# create console handler and set level to debug
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)

# create formatter
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')

# add formatter to ch
ch.setFormatter(formatter)

# add ch to logger
logger.addHandler(ch)
pd.set_option('display.max_colwidth', None)
clear_output()
# data_zip = '/content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data preprocessing/cough_chunking.tar.gz'
# shutil.unpack_archive(data_zip, '/content/')

In [None]:
root = '/content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/recognition/cnn/mobileNet'
data_root = '/content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data preprocessing/cough_chunking'

In [None]:
def add_root(s, name):
  temp = os.path.join(data_root, name)
  return str(os.path.join(temp, s))

### AICovidVN

In [None]:
aicovidvn = os.path.join(data_root, 'AICovidVN/aicovidvn_chunk_metadata.csv')

In [None]:
aicovidvn_df = pd.read_csv(aicovidvn)

In [None]:
aicovidvn_df['chunk_path'] = aicovidvn_df['chunk_path'].apply(add_root, name='AICovidVN')

In [None]:
aicovidvn_df.head()

Unnamed: 0,uuid,age,gender,label,sr_path,chunk_path,num_chunk
0,3284bcf1-2446-4f3a-ac66-14c76b294177-0,23.0,male,0,aicovidvn_cough_data/3284bcf1-2446-4f3a-ac66-14c76b294177.wav,/content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data preprocessing/cough_chunking/AICovidVN/aicovidvn_cough_chunk/3284bcf1-2446-4f3a-ac66-14c76b294177-0.wav,11
1,3284bcf1-2446-4f3a-ac66-14c76b294177-1,23.0,male,0,aicovidvn_cough_data/3284bcf1-2446-4f3a-ac66-14c76b294177.wav,/content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data preprocessing/cough_chunking/AICovidVN/aicovidvn_cough_chunk/3284bcf1-2446-4f3a-ac66-14c76b294177-1.wav,11
2,3284bcf1-2446-4f3a-ac66-14c76b294177-2,23.0,male,0,aicovidvn_cough_data/3284bcf1-2446-4f3a-ac66-14c76b294177.wav,/content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data preprocessing/cough_chunking/AICovidVN/aicovidvn_cough_chunk/3284bcf1-2446-4f3a-ac66-14c76b294177-2.wav,11
3,3284bcf1-2446-4f3a-ac66-14c76b294177-3,23.0,male,0,aicovidvn_cough_data/3284bcf1-2446-4f3a-ac66-14c76b294177.wav,/content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data preprocessing/cough_chunking/AICovidVN/aicovidvn_cough_chunk/3284bcf1-2446-4f3a-ac66-14c76b294177-3.wav,11
4,3284bcf1-2446-4f3a-ac66-14c76b294177-4,23.0,male,0,aicovidvn_cough_data/3284bcf1-2446-4f3a-ac66-14c76b294177.wav,/content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data preprocessing/cough_chunking/AICovidVN/aicovidvn_cough_chunk/3284bcf1-2446-4f3a-ac66-14c76b294177-4.wav,11


### Coswara

In [None]:
coswara = os.path.join(data_root, 'coswara/coswara_chunk_metadata.csv')
coswara_df = pd.read_csv(coswara)
coswara_df['chunk_path'] = coswara_df['chunk_path'].apply(add_root, name='coswara')
coswara_df.head()

Unnamed: 0,uuid,age,record_date,is_english_proficiency,gender,country,locality,state,is_returning_user,is_smoker,is_cold,is_hypertension,is_diabetes,is_cough,date_of_ct_scan,has_ctScan,ct_score,is_diarrheoa,is_fever,is_loss_of_smell,is_muscle_pain,test_type,test_date,test_status,is_using_mask,vaccination_status,is_breathing_difficulty,is_others_resp,is_fatigue,is_sore_throat,is_ischemic_heart_disease,is_asthma,is_others_preexist_conditions,is_chronic_lung_disease,is_neumonia,label,sr_cough,chunk_path,num_chunk
0,20200424-iV3Db6t1T8b7c5HQY2TwxIhjbzD3-cough-shallow-0,28,2020-04-23,y,male,india,anantapur,andhra pradesh,n,,,,,,,,,,,,,,,,,,,,,,,,,,,healthy,coswara_cough_data/20200424/iV3Db6t1T8b7c5HQY2TwxIhjbzD3/cough-shallow.wav,/content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data preprocessing/cough_chunking/coswara/coswara_cough_chunk/20200424-iV3Db6t1T8b7c5HQY2TwxIhjbzD3-cough-shallow-0.wav,8
1,20200424-iV3Db6t1T8b7c5HQY2TwxIhjbzD3-cough-shallow-1,28,2020-04-23,y,male,india,anantapur,andhra pradesh,n,,,,,,,,,,,,,,,,,,,,,,,,,,,healthy,coswara_cough_data/20200424/iV3Db6t1T8b7c5HQY2TwxIhjbzD3/cough-shallow.wav,/content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data preprocessing/cough_chunking/coswara/coswara_cough_chunk/20200424-iV3Db6t1T8b7c5HQY2TwxIhjbzD3-cough-shallow-1.wav,8
2,20200424-iV3Db6t1T8b7c5HQY2TwxIhjbzD3-cough-shallow-2,28,2020-04-23,y,male,india,anantapur,andhra pradesh,n,,,,,,,,,,,,,,,,,,,,,,,,,,,healthy,coswara_cough_data/20200424/iV3Db6t1T8b7c5HQY2TwxIhjbzD3/cough-shallow.wav,/content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data preprocessing/cough_chunking/coswara/coswara_cough_chunk/20200424-iV3Db6t1T8b7c5HQY2TwxIhjbzD3-cough-shallow-2.wav,8
3,20200424-iV3Db6t1T8b7c5HQY2TwxIhjbzD3-cough-shallow-3,28,2020-04-23,y,male,india,anantapur,andhra pradesh,n,,,,,,,,,,,,,,,,,,,,,,,,,,,healthy,coswara_cough_data/20200424/iV3Db6t1T8b7c5HQY2TwxIhjbzD3/cough-shallow.wav,/content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data preprocessing/cough_chunking/coswara/coswara_cough_chunk/20200424-iV3Db6t1T8b7c5HQY2TwxIhjbzD3-cough-shallow-3.wav,8
4,20200424-iV3Db6t1T8b7c5HQY2TwxIhjbzD3-cough-shallow-4,28,2020-04-23,y,male,india,anantapur,andhra pradesh,n,,,,,,,,,,,,,,,,,,,,,,,,,,,healthy,coswara_cough_data/20200424/iV3Db6t1T8b7c5HQY2TwxIhjbzD3/cough-shallow.wav,/content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data preprocessing/cough_chunking/coswara/coswara_cough_chunk/20200424-iV3Db6t1T8b7c5HQY2TwxIhjbzD3-cough-shallow-4.wav,8


### Coughvid

In [None]:
coughvid = os.path.join(data_root, 'coughvid/coughvid_chunk_metadata.csv')
coughvid_df = pd.read_csv(coughvid)
coughvid_df['chunk_path'] = coughvid_df['chunk_path'].apply(add_root, name='coughvid')
coughvid_df.head()

Unnamed: 0,uuid,datetime,cough_detected,latitude,longitude,age,gender,respiratory_condition,fever_muscle_pain,quality_1,cough_type_1,dyspnea_1,wheezing_1,stridor_1,choking_1,congestion_1,nothing_1,diagnosis_1,severity_1,quality_2,cough_type_2,dyspnea_2,wheezing_2,stridor_2,choking_2,congestion_2,nothing_2,diagnosis_2,severity_2,quality_3,dyspnea_3,wheezing_3,stridor_3,choking_3,congestion_3,nothing_3,cough_type_3,diagnosis_3,severity_3,label,sr_cough,chunk_path,num_chunk
0,00039425-7f3a-42aa-ac13-834aaa2b6b92-0,2020-04-13t21:30:59.801831+00:00,0.9609,31.3,34.8,15.0,male,False,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,healthy,coughvid_cough_data/00039425-7f3a-42aa-ac13-834aaa2b6b92.webm,/content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data preprocessing/cough_chunking/coughvid/coughvid_cough_chunk/00039425-7f3a-42aa-ac13-834aaa2b6b92-0.wav,3
1,00039425-7f3a-42aa-ac13-834aaa2b6b92-1,2020-04-13t21:30:59.801831+00:00,0.9609,31.3,34.8,15.0,male,False,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,healthy,coughvid_cough_data/00039425-7f3a-42aa-ac13-834aaa2b6b92.webm,/content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data preprocessing/cough_chunking/coughvid/coughvid_cough_chunk/00039425-7f3a-42aa-ac13-834aaa2b6b92-1.wav,3
2,00039425-7f3a-42aa-ac13-834aaa2b6b92-2,2020-04-13t21:30:59.801831+00:00,0.9609,31.3,34.8,15.0,male,False,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,healthy,coughvid_cough_data/00039425-7f3a-42aa-ac13-834aaa2b6b92.webm,/content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data preprocessing/cough_chunking/coughvid/coughvid_cough_chunk/00039425-7f3a-42aa-ac13-834aaa2b6b92-2.wav,3
3,0009eb28-d8be-4dc1-92bb-907e53bc5c7a-0,2020-04-12t04:02:18.159383+00:00,0.9301,40.0,-75.1,34.0,male,True,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,healthy,coughvid_cough_data/0009eb28-d8be-4dc1-92bb-907e53bc5c7a.webm,/content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data preprocessing/cough_chunking/coughvid/coughvid_cough_chunk/0009eb28-d8be-4dc1-92bb-907e53bc5c7a-0.wav,3
4,0009eb28-d8be-4dc1-92bb-907e53bc5c7a-1,2020-04-12t04:02:18.159383+00:00,0.9301,40.0,-75.1,34.0,male,True,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,healthy,coughvid_cough_data/0009eb28-d8be-4dc1-92bb-907e53bc5c7a.webm,/content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data preprocessing/cough_chunking/coughvid/coughvid_cough_chunk/0009eb28-d8be-4dc1-92bb-907e53bc5c7a-1.wav,3


## Feature extraction
* Image-like log-melspectrogram in shape (224, 236)
* MFCCs features to get shape: (234, 236)

### Set up

In [None]:
import librosa
import librosa.display as ldp
import matplotlib.pyplot as plt
import IPython.display as ipd
import numpy as np
# clear_output()

In [None]:
START_SAMPLE = 10000
SAMPLES_TO_CONSIDER = 22050


def show_feature(feature, sample_rate):
  plt.figure(figsize=(15, 10))
  ldp.specshow(feature,
               x_axis="time",
               y_axis="mel", 
               sr=sample_rate)
  plt.colorbar(format="%+2.f")
  plt.show()


def lengthen_cough(data: np.array, n):
  temp = data
  for i in range(n-1):
    temp = np.concatenate([temp, data])
  return temp

def concat_list(data: list, n):
  temp = []
  for i in range(n):
    temp += data
  return temp


def mel_spectrogram(path: str, nfft=2048, hoplen=512, nmels=224):
  data, sample_rate = librosa.load(path)
  data = data[START_SAMPLE: SAMPLES_TO_CONSIDER]
  long_data = lengthen_cough(data, 10)
  mel_spec = librosa.feature.melspectrogram(y=long_data,
                                            sr=sample_rate,
                                            n_fft=nfft,
                                            hop_length=hoplen,
                                            n_mels=nmels)
  # logger.info(path)
  log_mel_spec = librosa.power_to_db(mel_spec)
  return log_mel_spec

def n_mfcc(path: str, nfft=2048, hoplen=512, nmels=224, nmfcc=13):
  data, sample_rate = librosa.load(path)
  data = data[START_SAMPLE: SAMPLES_TO_CONSIDER]
  data = lengthen_cough(data, 10)
  mfcc = librosa.feature.mfcc(y=data,
                              sr=sample_rate,
                              n_fft=nfft,
                              hop_length=hoplen,
                              n_mfcc=nmfcc)
  # logger.info(path)
  # return mfcc.T.tolist()
  return lengthen_cough(mfcc, int(nmels/nmfcc)+1)

In [None]:
def extract_feature(data: pd.DataFrame, f_type: str, nmfcc=13):
  features = []
  if f_type.lower() == 'melspectrogram':
    for fp in data['chunk_path'].tolist():
      features.append(mel_spectrogram(path=fp))
      if len(features) % 1000 == 0:
        logger.info(f'{len(features)} chunks extracted !!!')

  if f_type.lower() == 'mfcc':
    for fp in data['chunk_path'].tolist():
      features.append(n_mfcc(path=fp, nmfcc=nmfcc))
      if len(features) % 1000 == 0:
        logger.info(f'{len(features)} chunks extracted !!!')

  logger.info(f'{len(features)} chunks extracted !!!')
  logger.info('Feature extraction: Done !!!')
  return np.array(features)

### AICovidVN

In [None]:
features = extract_feature(aicovidvn_df[:15000], 'melspectrogram')

2022-02-12 13:30:26,569 - MOBILENET - INFO - 1000 chunk extracted !!!
2022-02-12 13:31:48,253 - MOBILENET - INFO - 2000 chunk extracted !!!
2022-02-12 13:33:05,882 - MOBILENET - INFO - 3000 chunk extracted !!!
2022-02-12 13:34:24,673 - MOBILENET - INFO - 4000 chunk extracted !!!
2022-02-12 13:35:42,353 - MOBILENET - INFO - 5000 chunk extracted !!!
2022-02-12 13:37:10,594 - MOBILENET - INFO - 6000 chunk extracted !!!
2022-02-12 13:38:39,110 - MOBILENET - INFO - 7000 chunk extracted !!!
2022-02-12 13:40:17,575 - MOBILENET - INFO - 8000 chunk extracted !!!
2022-02-12 13:41:44,841 - MOBILENET - INFO - 9000 chunk extracted !!!
2022-02-12 13:43:12,739 - MOBILENET - INFO - 10000 chunk extracted !!!
2022-02-12 13:44:39,656 - MOBILENET - INFO - 11000 chunk extracted !!!
2022-02-12 13:46:06,659 - MOBILENET - INFO - 12000 chunk extracted !!!
2022-02-12 13:47:36,711 - MOBILENET - INFO - 13000 chunk extracted !!!
2022-02-12 13:49:09,842 - MOBILENET - INFO - 14000 chunk extracted !!!
2022-02-12 13:5

In [None]:
np.save(os.path.join(root, 'melspectrogram_aicovidvn_features_15k.npy'), features)
logger.info(os.path.join(root, 'melspectrogram_aicovidvn_features_15k.npy'))

2022-02-12 13:51:02,798 - MOBILENET - INFO - /content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/recognition/cnn/mobileNet/melspectrogram_aicovidvn_features_15k.npy


In [None]:
features_16k = extract_feature(aicovidvn_df[15000:], 'melspectrogram')

2022-02-12 13:55:39,007 - MOBILENET - INFO - 1000 chunk extracted !!!
2022-02-12 13:57:15,434 - MOBILENET - INFO - 2000 chunk extracted !!!
2022-02-12 13:58:43,228 - MOBILENET - INFO - 3000 chunk extracted !!!
2022-02-12 14:00:13,230 - MOBILENET - INFO - 4000 chunk extracted !!!
2022-02-12 14:01:48,050 - MOBILENET - INFO - 5000 chunk extracted !!!
2022-02-12 14:03:14,849 - MOBILENET - INFO - 6000 chunk extracted !!!
2022-02-12 14:04:48,588 - MOBILENET - INFO - 7000 chunk extracted !!!
2022-02-12 14:06:20,664 - MOBILENET - INFO - 8000 chunk extracted !!!
2022-02-12 14:07:51,842 - MOBILENET - INFO - 9000 chunk extracted !!!
2022-02-12 14:09:17,460 - MOBILENET - INFO - 10000 chunk extracted !!!
2022-02-12 14:10:48,992 - MOBILENET - INFO - 11000 chunk extracted !!!
2022-02-12 14:12:15,459 - MOBILENET - INFO - 12000 chunk extracted !!!
2022-02-12 14:13:44,725 - MOBILENET - INFO - 13000 chunk extracted !!!
2022-02-12 14:15:17,544 - MOBILENET - INFO - 14000 chunk extracted !!!
2022-02-12 14:1

In [None]:
np.save(os.path.join(root, 'melspectrogram_aicovidvn_features_16542.npy'), features_16k)
logger.info(os.path.join(root, 'melspectrogram_aicovidvn_features_16542.npy'))

2022-02-12 14:19:19,503 - MOBILENET - INFO - /content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/recognition/cnn/mobileNet/melspectrogram_aicovidvn_features_16542.npy


In [None]:
aicovidvn_df[15000:].shape

(16542, 7)

In [None]:
# from numpy import load
# # load dict of arrays
# dict_data = load(os.path.join(root, 'melspectrogram_aicovidvn_features.npy'))


### Coswara

#### MFCC 13, 26, 39


In [None]:
features = extract_feature(coswara_df, 'mfcc', nmfcc=13)
np.save(os.path.join(root, 'mfcc_13_coswara_features.npy'), features)
logger.info(os.path.join(root, 'mfcc_13_coswara_features.npy'))

2022-02-20 13:53:19,261 - MOBILENET - INFO - 1000 chunks extracted !!!
2022-02-20 14:00:05,094 - MOBILENET - INFO - 2000 chunks extracted !!!
2022-02-20 14:07:03,720 - MOBILENET - INFO - 3000 chunks extracted !!!
2022-02-20 14:13:29,792 - MOBILENET - INFO - 4000 chunks extracted !!!
2022-02-20 14:20:01,586 - MOBILENET - INFO - 5000 chunks extracted !!!
2022-02-20 14:26:15,699 - MOBILENET - INFO - 6000 chunks extracted !!!
2022-02-20 14:32:37,591 - MOBILENET - INFO - 7000 chunks extracted !!!
2022-02-20 14:39:02,744 - MOBILENET - INFO - 8000 chunks extracted !!!
2022-02-20 14:45:29,209 - MOBILENET - INFO - 9000 chunks extracted !!!
2022-02-20 14:51:57,032 - MOBILENET - INFO - 10000 chunks extracted !!!
2022-02-20 14:57:56,573 - MOBILENET - INFO - 11000 chunks extracted !!!
2022-02-20 15:04:08,088 - MOBILENET - INFO - 12000 chunks extracted !!!
2022-02-20 15:10:35,354 - MOBILENET - INFO - 13000 chunks extracted !!!
2022-02-20 15:17:06,984 - MOBILENET - INFO - 14000 chunks extracted !!!
2

In [None]:
features = extract_feature(coswara_df, 'mfcc', nmfcc=26)
np.save(os.path.join(root, 'mfcc_26_coswara_features.npy'), features)
logger.info(os.path.join(root, 'mfcc_26_coswara_features.npy'))

2022-02-21 00:11:10,446 - MOBILENET - INFO - 1000 chunks extracted !!!
2022-02-21 00:15:42,247 - MOBILENET - INFO - 2000 chunks extracted !!!
2022-02-21 00:20:23,904 - MOBILENET - INFO - 3000 chunks extracted !!!
2022-02-21 00:24:51,072 - MOBILENET - INFO - 4000 chunks extracted !!!
2022-02-21 00:29:17,904 - MOBILENET - INFO - 5000 chunks extracted !!!
2022-02-21 00:33:43,662 - MOBILENET - INFO - 6000 chunks extracted !!!
2022-02-21 00:38:08,071 - MOBILENET - INFO - 7000 chunks extracted !!!
2022-02-21 00:42:33,311 - MOBILENET - INFO - 8000 chunks extracted !!!
2022-02-21 00:47:01,646 - MOBILENET - INFO - 9000 chunks extracted !!!
2022-02-21 00:51:29,846 - MOBILENET - INFO - 10000 chunks extracted !!!
2022-02-21 00:55:53,486 - MOBILENET - INFO - 11000 chunks extracted !!!
2022-02-21 01:00:17,960 - MOBILENET - INFO - 12000 chunks extracted !!!
2022-02-21 01:04:46,943 - MOBILENET - INFO - 13000 chunks extracted !!!
2022-02-21 01:09:22,328 - MOBILENET - INFO - 14000 chunks extracted !!!
2

In [None]:
del features
features = extract_feature(coswara_df, 'mfcc', nmfcc=39)
np.save(os.path.join(root, 'mfcc_39_coswara_features.npy'), features)
logger.info(os.path.join(root, 'mfcc_39_coswara_features.npy'))

2022-02-21 02:31:02,246 - MOBILENET - INFO - 1000 chunks extracted !!!
2022-02-21 02:33:07,844 - MOBILENET - INFO - 2000 chunks extracted !!!
2022-02-21 02:35:14,220 - MOBILENET - INFO - 3000 chunks extracted !!!
2022-02-21 02:37:19,023 - MOBILENET - INFO - 4000 chunks extracted !!!
2022-02-21 02:39:25,435 - MOBILENET - INFO - 5000 chunks extracted !!!
2022-02-21 02:41:31,076 - MOBILENET - INFO - 6000 chunks extracted !!!
2022-02-21 02:43:35,668 - MOBILENET - INFO - 7000 chunks extracted !!!
2022-02-21 02:45:40,867 - MOBILENET - INFO - 8000 chunks extracted !!!
2022-02-21 02:47:47,073 - MOBILENET - INFO - 9000 chunks extracted !!!
2022-02-21 02:49:52,439 - MOBILENET - INFO - 10000 chunks extracted !!!
2022-02-21 02:51:56,371 - MOBILENET - INFO - 11000 chunks extracted !!!
2022-02-21 02:54:00,878 - MOBILENET - INFO - 12000 chunks extracted !!!
2022-02-21 02:56:08,288 - MOBILENET - INFO - 13000 chunks extracted !!!
2022-02-21 02:58:18,164 - MOBILENET - INFO - 14000 chunks extracted !!!
2

#### Melspectrogram


In [None]:
features = extract_feature(coswara_df, 'melspectrogram')

In [None]:
np.save(os.path.join(root, 'melspectrogram_coswara_features.npy'), features)
logger.info(os.path.join(root, 'melspectrogram_coswara_features.npy'))

2022-02-13 16:15:41,442 - MOBILENET - INFO - /content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/recognition/cnn/mobileNet/melspectrogram_coswara_features.npy
2022-02-13 16:15:41,442 - MOBILENET - INFO - /content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/recognition/cnn/mobileNet/melspectrogram_coswara_features.npy


### Coughvid

#### MFCC

##### 13

In [None]:
features = extract_feature(coughvid_df[:20000], 'mfcc', nmfcc=13)
np.save(os.path.join(root, 'mfcc_13_coughvid_features_20k.npy'), features)
logger.info(os.path.join(root, 'mfcc_13_coughvid_features_20k.npy'))

2022-02-21 03:50:50,387 - MOBILENET - INFO - 1000 chunks extracted !!!
2022-02-21 03:55:18,575 - MOBILENET - INFO - 2000 chunks extracted !!!
2022-02-21 03:59:45,315 - MOBILENET - INFO - 3000 chunks extracted !!!
2022-02-21 04:04:17,407 - MOBILENET - INFO - 4000 chunks extracted !!!
2022-02-21 04:08:45,092 - MOBILENET - INFO - 5000 chunks extracted !!!
2022-02-21 04:13:12,480 - MOBILENET - INFO - 6000 chunks extracted !!!
2022-02-21 04:17:41,394 - MOBILENET - INFO - 7000 chunks extracted !!!
2022-02-21 04:22:05,576 - MOBILENET - INFO - 8000 chunks extracted !!!
2022-02-21 04:26:38,219 - MOBILENET - INFO - 9000 chunks extracted !!!
2022-02-21 04:31:06,616 - MOBILENET - INFO - 10000 chunks extracted !!!
2022-02-21 04:35:33,977 - MOBILENET - INFO - 11000 chunks extracted !!!
2022-02-21 04:40:02,798 - MOBILENET - INFO - 12000 chunks extracted !!!
2022-02-21 04:44:33,528 - MOBILENET - INFO - 13000 chunks extracted !!!
2022-02-21 04:48:58,710 - MOBILENET - INFO - 14000 chunks extracted !!!
2

In [None]:
del features
features = extract_feature(coughvid_df[20000:40000], 'mfcc', nmfcc=13)
np.save(os.path.join(root, 'mfcc_13_coughvid_features_20k_40k.npy'), features)
logger.info(os.path.join(root, 'mfcc_13_coughvid_features_20k_40k.npy'))

2022-02-21 05:28:25,501 - MOBILENET - INFO - 1000 chunks extracted !!!
2022-02-21 05:32:52,386 - MOBILENET - INFO - 2000 chunks extracted !!!
2022-02-21 05:37:23,980 - MOBILENET - INFO - 3000 chunks extracted !!!
2022-02-21 05:41:51,444 - MOBILENET - INFO - 4000 chunks extracted !!!
2022-02-21 05:46:22,517 - MOBILENET - INFO - 5000 chunks extracted !!!
2022-02-21 05:50:59,654 - MOBILENET - INFO - 6000 chunks extracted !!!
2022-02-21 05:55:31,048 - MOBILENET - INFO - 7000 chunks extracted !!!
2022-02-21 06:00:01,371 - MOBILENET - INFO - 8000 chunks extracted !!!
2022-02-21 06:04:37,903 - MOBILENET - INFO - 9000 chunks extracted !!!
2022-02-21 06:09:08,516 - MOBILENET - INFO - 10000 chunks extracted !!!
2022-02-21 06:13:43,482 - MOBILENET - INFO - 11000 chunks extracted !!!
2022-02-21 06:18:19,848 - MOBILENET - INFO - 12000 chunks extracted !!!
2022-02-21 06:22:51,185 - MOBILENET - INFO - 13000 chunks extracted !!!
2022-02-21 06:27:22,916 - MOBILENET - INFO - 14000 chunks extracted !!!
2

In [None]:
del features
features = extract_feature(coughvid_df[40000:60000], 'mfcc', nmfcc=13)
np.save(os.path.join(root, 'mfcc_13_coughvid_features_40k_60k.npy'), features)
logger.info(os.path.join(root, 'mfcc_13_coughvid_features_40k_60k.npy'))

2022-02-21 07:01:42,407 - MOBILENET - INFO - 1000 chunks extracted !!!
2022-02-21 07:06:15,806 - MOBILENET - INFO - 2000 chunks extracted !!!
2022-02-21 07:10:45,505 - MOBILENET - INFO - 3000 chunks extracted !!!
2022-02-21 07:15:18,025 - MOBILENET - INFO - 4000 chunks extracted !!!
2022-02-21 07:19:48,064 - MOBILENET - INFO - 5000 chunks extracted !!!
2022-02-21 07:24:18,680 - MOBILENET - INFO - 6000 chunks extracted !!!
2022-02-21 07:28:53,098 - MOBILENET - INFO - 7000 chunks extracted !!!
2022-02-21 07:33:22,344 - MOBILENET - INFO - 8000 chunks extracted !!!
2022-02-21 07:37:51,909 - MOBILENET - INFO - 9000 chunks extracted !!!
2022-02-21 07:42:21,283 - MOBILENET - INFO - 10000 chunks extracted !!!
2022-02-21 07:46:53,613 - MOBILENET - INFO - 11000 chunks extracted !!!
2022-02-21 07:51:20,310 - MOBILENET - INFO - 12000 chunks extracted !!!
2022-02-21 07:55:46,213 - MOBILENET - INFO - 13000 chunks extracted !!!
2022-02-21 08:00:18,110 - MOBILENET - INFO - 14000 chunks extracted !!!
2

In [None]:
del features
features = extract_feature(coughvid_df[60000:75000], 'mfcc', nmfcc=13)
np.save(os.path.join(root, 'mfcc_13_coughvid_features_60k_75k.npy'), features)
logger.info(os.path.join(root, 'mfcc_13_coughvid_features_60k_75k.npy'))

2022-02-21 08:36:43,971 - MOBILENET - INFO - 1000 chunks extracted !!!
2022-02-21 08:41:14,207 - MOBILENET - INFO - 2000 chunks extracted !!!
2022-02-21 08:45:46,965 - MOBILENET - INFO - 3000 chunks extracted !!!
2022-02-21 08:50:27,868 - MOBILENET - INFO - 4000 chunks extracted !!!
2022-02-21 08:54:50,699 - MOBILENET - INFO - 5000 chunks extracted !!!
2022-02-21 08:59:21,708 - MOBILENET - INFO - 6000 chunks extracted !!!
2022-02-21 09:03:46,684 - MOBILENET - INFO - 7000 chunks extracted !!!
2022-02-21 09:08:19,096 - MOBILENET - INFO - 8000 chunks extracted !!!
2022-02-21 09:12:50,672 - MOBILENET - INFO - 9000 chunks extracted !!!
2022-02-21 09:17:21,553 - MOBILENET - INFO - 10000 chunks extracted !!!
2022-02-21 09:21:54,874 - MOBILENET - INFO - 11000 chunks extracted !!!
2022-02-21 09:26:27,275 - MOBILENET - INFO - 12000 chunks extracted !!!
2022-02-21 09:31:02,407 - MOBILENET - INFO - 13000 chunks extracted !!!
2022-02-21 09:35:33,661 - MOBILENET - INFO - 14000 chunks extracted !!!
2

In [None]:
del features
features = extract_feature(coughvid_df[75000:], 'mfcc', nmfcc=13)
np.save(os.path.join(root, 'mfcc_13_coughvid_features_9981.npy'), features)
logger.info(os.path.join(root, 'mfcc_13_coughvid_features_9981.npy'))

2022-02-21 09:46:09,114 - MOBILENET - INFO - 1000 chunks extracted !!!
2022-02-21 09:50:45,230 - MOBILENET - INFO - 2000 chunks extracted !!!
2022-02-21 09:55:18,728 - MOBILENET - INFO - 3000 chunks extracted !!!
2022-02-21 09:59:58,001 - MOBILENET - INFO - 4000 chunks extracted !!!
2022-02-21 10:04:34,675 - MOBILENET - INFO - 5000 chunks extracted !!!
2022-02-21 10:09:17,275 - MOBILENET - INFO - 6000 chunks extracted !!!
2022-02-21 10:13:50,323 - MOBILENET - INFO - 7000 chunks extracted !!!
2022-02-21 10:18:26,476 - MOBILENET - INFO - 8000 chunks extracted !!!
2022-02-21 10:22:58,949 - MOBILENET - INFO - 9000 chunks extracted !!!
2022-02-21 10:27:20,142 - MOBILENET - INFO - 9981 chunks extracted !!!
2022-02-21 10:27:20,144 - MOBILENET - INFO - Feature extraction: Done !!!
2022-02-21 10:27:34,704 - MOBILENET - INFO - /content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/recognition/cnn/mobileNet/mfcc_13_coughvid_features_9981.npy


##### 26

In [None]:
features = extract_feature(coughvid_df[:20000], 'mfcc', nmfcc=26)
np.save(os.path.join(root, 'mfcc_26_coughvid_features_20k.npy'), features)
logger.info(os.path.join(root, 'mfcc_26_coughvid_features_20k.npy'))

2022-02-21 12:28:03,615 - MOBILENET - INFO - 1000 chunks extracted !!!
2022-02-21 12:36:45,446 - MOBILENET - INFO - 2000 chunks extracted !!!
2022-02-21 12:45:23,077 - MOBILENET - INFO - 3000 chunks extracted !!!
2022-02-21 12:54:01,532 - MOBILENET - INFO - 4000 chunks extracted !!!
2022-02-21 13:02:43,314 - MOBILENET - INFO - 5000 chunks extracted !!!
2022-02-21 13:11:22,632 - MOBILENET - INFO - 6000 chunks extracted !!!
2022-02-21 13:20:05,250 - MOBILENET - INFO - 7000 chunks extracted !!!
2022-02-21 13:28:39,140 - MOBILENET - INFO - 8000 chunks extracted !!!
2022-02-21 13:37:25,078 - MOBILENET - INFO - 9000 chunks extracted !!!
2022-02-21 13:46:11,157 - MOBILENET - INFO - 10000 chunks extracted !!!
2022-02-21 13:54:47,608 - MOBILENET - INFO - 11000 chunks extracted !!!
2022-02-21 14:03:28,711 - MOBILENET - INFO - 12000 chunks extracted !!!
2022-02-21 14:12:09,643 - MOBILENET - INFO - 13000 chunks extracted !!!
2022-02-21 14:20:56,547 - MOBILENET - INFO - 14000 chunks extracted !!!
2

In [None]:
del features
features = extract_feature(coughvid_df[20000:40000], 'mfcc', nmfcc=26)
np.save(os.path.join(root, 'mfcc_26_coughvid_features_20k_40k.npy'), features)
logger.info(os.path.join(root, 'mfcc_26_coughvid_features_20k_40k.npy'))

2022-02-21 15:22:40,906 - MOBILENET - INFO - 1000 chunks extracted !!!
2022-02-21 15:31:29,576 - MOBILENET - INFO - 2000 chunks extracted !!!
2022-02-21 15:40:12,706 - MOBILENET - INFO - 3000 chunks extracted !!!
2022-02-21 15:49:03,490 - MOBILENET - INFO - 4000 chunks extracted !!!
2022-02-21 15:57:41,884 - MOBILENET - INFO - 5000 chunks extracted !!!
2022-02-21 16:06:25,882 - MOBILENET - INFO - 6000 chunks extracted !!!
2022-02-21 16:15:04,847 - MOBILENET - INFO - 7000 chunks extracted !!!
2022-02-21 16:23:44,278 - MOBILENET - INFO - 8000 chunks extracted !!!
2022-02-21 16:32:29,655 - MOBILENET - INFO - 9000 chunks extracted !!!
2022-02-21 16:41:07,115 - MOBILENET - INFO - 10000 chunks extracted !!!
2022-02-21 16:49:45,991 - MOBILENET - INFO - 11000 chunks extracted !!!
2022-02-21 16:58:27,617 - MOBILENET - INFO - 12000 chunks extracted !!!
2022-02-21 17:07:07,972 - MOBILENET - INFO - 13000 chunks extracted !!!
2022-02-21 17:15:44,404 - MOBILENET - INFO - 14000 chunks extracted !!!
2

In [None]:
# del features
features = extract_feature(coughvid_df[40000:60000], 'mfcc', nmfcc=26)
np.save(os.path.join(root, 'mfcc_26_coughvid_features_40k_60k.npy'), features)
logger.info(os.path.join(root, 'mfcc_26_coughvid_features_40k_60k.npy'))

2022-02-21 19:15:49,998 - MOBILENET - INFO - 1000 chunks extracted !!!
2022-02-21 19:20:06,240 - MOBILENET - INFO - 2000 chunks extracted !!!
2022-02-21 19:24:15,000 - MOBILENET - INFO - 3000 chunks extracted !!!
2022-02-21 19:28:26,360 - MOBILENET - INFO - 4000 chunks extracted !!!
2022-02-21 19:32:48,546 - MOBILENET - INFO - 5000 chunks extracted !!!
2022-02-21 19:37:02,184 - MOBILENET - INFO - 6000 chunks extracted !!!
2022-02-21 19:41:14,822 - MOBILENET - INFO - 7000 chunks extracted !!!
2022-02-21 19:45:27,391 - MOBILENET - INFO - 8000 chunks extracted !!!
2022-02-21 19:49:56,537 - MOBILENET - INFO - 9000 chunks extracted !!!
2022-02-21 19:54:11,757 - MOBILENET - INFO - 10000 chunks extracted !!!
2022-02-21 19:58:30,388 - MOBILENET - INFO - 11000 chunks extracted !!!
2022-02-21 20:02:39,878 - MOBILENET - INFO - 12000 chunks extracted !!!
2022-02-21 20:06:53,725 - MOBILENET - INFO - 13000 chunks extracted !!!
2022-02-21 20:11:06,152 - MOBILENET - INFO - 14000 chunks extracted !!!
2

In [None]:
del features
features = extract_feature(coughvid_df[60000:75000], 'mfcc', nmfcc=26)
np.save(os.path.join(root, 'mfcc_26_coughvid_features_60k_75k.npy'), features)
logger.info(os.path.join(root, 'mfcc_26_coughvid_features_60k_75k.npy'))

2022-02-21 20:41:28,356 - MOBILENET - INFO - 1000 chunks extracted !!!
2022-02-21 20:45:38,964 - MOBILENET - INFO - 2000 chunks extracted !!!
2022-02-21 20:49:51,633 - MOBILENET - INFO - 3000 chunks extracted !!!
2022-02-21 20:53:58,067 - MOBILENET - INFO - 4000 chunks extracted !!!
2022-02-21 20:58:10,005 - MOBILENET - INFO - 5000 chunks extracted !!!
2022-02-21 21:02:22,225 - MOBILENET - INFO - 6000 chunks extracted !!!
2022-02-21 21:06:32,171 - MOBILENET - INFO - 7000 chunks extracted !!!
2022-02-21 21:10:45,109 - MOBILENET - INFO - 8000 chunks extracted !!!
2022-02-21 21:15:02,822 - MOBILENET - INFO - 9000 chunks extracted !!!
2022-02-21 21:19:19,581 - MOBILENET - INFO - 10000 chunks extracted !!!
2022-02-21 21:23:23,725 - MOBILENET - INFO - 11000 chunks extracted !!!
2022-02-21 21:27:26,315 - MOBILENET - INFO - 12000 chunks extracted !!!
2022-02-21 21:31:38,831 - MOBILENET - INFO - 13000 chunks extracted !!!
2022-02-21 21:35:49,241 - MOBILENET - INFO - 14000 chunks extracted !!!
2

In [None]:
del features
features = extract_feature(coughvid_df[75000:], 'mfcc', nmfcc=26)
np.save(os.path.join(root, 'mfcc_26_coughvid_features_9981.npy'), features)
logger.info(os.path.join(root, 'mfcc_26_coughvid_features_9981.npy'))

2022-02-21 21:45:13,492 - MOBILENET - INFO - 1000 chunks extracted !!!
2022-02-21 21:49:25,221 - MOBILENET - INFO - 2000 chunks extracted !!!
2022-02-21 21:53:40,017 - MOBILENET - INFO - 3000 chunks extracted !!!
2022-02-21 21:57:56,583 - MOBILENET - INFO - 4000 chunks extracted !!!
2022-02-21 22:02:13,537 - MOBILENET - INFO - 5000 chunks extracted !!!
2022-02-21 22:06:31,487 - MOBILENET - INFO - 6000 chunks extracted !!!
2022-02-21 22:10:49,580 - MOBILENET - INFO - 7000 chunks extracted !!!
2022-02-21 22:15:06,534 - MOBILENET - INFO - 8000 chunks extracted !!!
2022-02-21 22:19:17,912 - MOBILENET - INFO - 9000 chunks extracted !!!
2022-02-21 22:23:15,662 - MOBILENET - INFO - 9981 chunks extracted !!!
2022-02-21 22:23:15,670 - MOBILENET - INFO - Feature extraction: Done !!!
2022-02-21 22:23:28,907 - MOBILENET - INFO - /content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/recognition/cnn/mobileNet/mfcc_26_coughvid_features_9981.npy


##### 39

In [None]:
del features
features = extract_feature(coughvid_df[:20000], 'mfcc', nmfcc=39)
np.save(os.path.join(root, 'mfcc_39_coughvid_features_20k.npy'), features)
logger.info(os.path.join(root, 'mfcc_39_coughvid_features_20k.npy'))

2022-02-21 22:27:40,490 - MOBILENET - INFO - 1000 chunks extracted !!!
2022-02-21 22:31:50,389 - MOBILENET - INFO - 2000 chunks extracted !!!
2022-02-21 22:35:54,002 - MOBILENET - INFO - 3000 chunks extracted !!!
2022-02-21 22:40:04,154 - MOBILENET - INFO - 4000 chunks extracted !!!
2022-02-21 22:44:09,512 - MOBILENET - INFO - 5000 chunks extracted !!!
2022-02-21 22:48:16,311 - MOBILENET - INFO - 6000 chunks extracted !!!
2022-02-21 22:52:27,796 - MOBILENET - INFO - 7000 chunks extracted !!!
2022-02-21 22:56:37,133 - MOBILENET - INFO - 8000 chunks extracted !!!
2022-02-21 23:00:49,383 - MOBILENET - INFO - 9000 chunks extracted !!!
2022-02-21 23:05:00,345 - MOBILENET - INFO - 10000 chunks extracted !!!
2022-02-21 23:09:11,150 - MOBILENET - INFO - 11000 chunks extracted !!!
2022-02-21 23:13:23,753 - MOBILENET - INFO - 12000 chunks extracted !!!
2022-02-21 23:17:31,670 - MOBILENET - INFO - 13000 chunks extracted !!!
2022-02-21 23:21:49,947 - MOBILENET - INFO - 14000 chunks extracted !!!
2

In [None]:
del features
features = extract_feature(coughvid_df[20000:40000], 'mfcc', nmfcc=39)
np.save(os.path.join(root, 'mfcc_39_coughvid_features_20k_40k.npy'), features)
logger.info(os.path.join(root, 'mfcc_39_coughvid_features_20k_40k.npy'))

2022-02-21 23:52:36,772 - MOBILENET - INFO - 1000 chunks extracted !!!
2022-02-21 23:56:42,647 - MOBILENET - INFO - 2000 chunks extracted !!!
2022-02-22 00:00:55,653 - MOBILENET - INFO - 3000 chunks extracted !!!
2022-02-22 00:05:15,635 - MOBILENET - INFO - 4000 chunks extracted !!!
2022-02-22 00:09:35,249 - MOBILENET - INFO - 5000 chunks extracted !!!
2022-02-22 00:13:59,096 - MOBILENET - INFO - 6000 chunks extracted !!!
2022-02-22 00:18:19,800 - MOBILENET - INFO - 7000 chunks extracted !!!
2022-02-22 00:22:39,713 - MOBILENET - INFO - 8000 chunks extracted !!!
2022-02-22 00:27:04,127 - MOBILENET - INFO - 9000 chunks extracted !!!
2022-02-22 00:31:36,758 - MOBILENET - INFO - 10000 chunks extracted !!!
2022-02-22 00:36:01,432 - MOBILENET - INFO - 11000 chunks extracted !!!
2022-02-22 00:40:22,568 - MOBILENET - INFO - 12000 chunks extracted !!!
2022-02-22 00:44:54,146 - MOBILENET - INFO - 13000 chunks extracted !!!
2022-02-22 00:49:19,066 - MOBILENET - INFO - 14000 chunks extracted !!!
2

In [None]:
del features
features = extract_feature(coughvid_df[40000:60000], 'mfcc', nmfcc=39)
np.save(os.path.join(root, 'mfcc_39_coughvid_features_40k_60k.npy'), features)
logger.info(os.path.join(root, 'mfcc_39_coughvid_features_40k_60k.npy'))

2022-02-22 01:23:03,942 - MOBILENET - INFO - 1000 chunks extracted !!!
2022-02-22 01:27:26,958 - MOBILENET - INFO - 2000 chunks extracted !!!
2022-02-22 01:31:55,667 - MOBILENET - INFO - 3000 chunks extracted !!!
2022-02-22 01:36:27,142 - MOBILENET - INFO - 4000 chunks extracted !!!
2022-02-22 01:40:52,872 - MOBILENET - INFO - 5000 chunks extracted !!!
2022-02-22 01:45:17,483 - MOBILENET - INFO - 6000 chunks extracted !!!
2022-02-22 01:49:40,002 - MOBILENET - INFO - 7000 chunks extracted !!!
2022-02-22 01:54:06,934 - MOBILENET - INFO - 8000 chunks extracted !!!
2022-02-22 01:58:31,482 - MOBILENET - INFO - 9000 chunks extracted !!!
2022-02-22 02:03:03,530 - MOBILENET - INFO - 10000 chunks extracted !!!
2022-02-22 02:07:33,219 - MOBILENET - INFO - 11000 chunks extracted !!!
2022-02-22 02:11:56,295 - MOBILENET - INFO - 12000 chunks extracted !!!
2022-02-22 02:16:23,964 - MOBILENET - INFO - 13000 chunks extracted !!!
2022-02-22 02:21:01,514 - MOBILENET - INFO - 14000 chunks extracted !!!
2

In [None]:
del features
features = extract_feature(coughvid_df[60000:75000], 'mfcc', nmfcc=39)
np.save(os.path.join(root, 'mfcc_39_coughvid_features_60k_75k.npy'), features)
logger.info(os.path.join(root, 'mfcc_39_coughvid_features_60k_75k.npy'))

2022-02-22 02:54:43,021 - MOBILENET - INFO - 1000 chunks extracted !!!
2022-02-22 02:59:06,124 - MOBILENET - INFO - 2000 chunks extracted !!!
2022-02-22 03:03:40,108 - MOBILENET - INFO - 3000 chunks extracted !!!
2022-02-22 03:08:00,920 - MOBILENET - INFO - 4000 chunks extracted !!!
2022-02-22 03:12:20,700 - MOBILENET - INFO - 5000 chunks extracted !!!
2022-02-22 03:16:45,604 - MOBILENET - INFO - 6000 chunks extracted !!!
2022-02-22 03:21:05,606 - MOBILENET - INFO - 7000 chunks extracted !!!
2022-02-22 03:25:27,734 - MOBILENET - INFO - 8000 chunks extracted !!!
2022-02-22 03:29:57,334 - MOBILENET - INFO - 9000 chunks extracted !!!
2022-02-22 03:34:22,647 - MOBILENET - INFO - 10000 chunks extracted !!!
2022-02-22 03:38:39,555 - MOBILENET - INFO - 11000 chunks extracted !!!
2022-02-22 03:42:53,538 - MOBILENET - INFO - 12000 chunks extracted !!!
2022-02-22 03:47:16,253 - MOBILENET - INFO - 13000 chunks extracted !!!
2022-02-22 03:51:44,460 - MOBILENET - INFO - 14000 chunks extracted !!!
2

In [None]:
del features
features = extract_feature(coughvid_df[75000:], 'mfcc', nmfcc=39)
np.save(os.path.join(root, 'mfcc_39_coughvid_features_9981.npy'), features)
logger.info(os.path.join(root, 'mfcc_39_coughvid_features_9981.npy'))

2022-02-22 04:01:37,080 - MOBILENET - INFO - 1000 chunks extracted !!!
2022-02-22 04:06:04,893 - MOBILENET - INFO - 2000 chunks extracted !!!
2022-02-22 04:10:30,548 - MOBILENET - INFO - 3000 chunks extracted !!!
2022-02-22 04:14:55,046 - MOBILENET - INFO - 4000 chunks extracted !!!
2022-02-22 04:19:22,358 - MOBILENET - INFO - 5000 chunks extracted !!!
2022-02-22 04:23:46,720 - MOBILENET - INFO - 6000 chunks extracted !!!
2022-02-22 04:28:15,898 - MOBILENET - INFO - 7000 chunks extracted !!!
2022-02-22 04:32:44,464 - MOBILENET - INFO - 8000 chunks extracted !!!
2022-02-22 04:37:11,070 - MOBILENET - INFO - 9000 chunks extracted !!!
2022-02-22 04:41:26,533 - MOBILENET - INFO - 9981 chunks extracted !!!
2022-02-22 04:41:26,535 - MOBILENET - INFO - Feature extraction: Done !!!
2022-02-22 04:41:41,107 - MOBILENET - INFO - /content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/recognition/cnn/mobileNet/mfcc_39_coughvid_features_9981.npy


#### Melspectrogram

In [None]:
features = extract_feature(coughvid_df[:20000], 'melspectrogram')

In [None]:
np.save(os.path.join(root, 'melspectrogram_coughvid_features_20k.npy'), features)
logger.info(os.path.join(root, 'melspectrogram_coughvid_features_20k.npy'))

In [None]:
del features
features = extract_feature(coughvid_df[20000:40000], 'melspectrogram')

In [None]:
np.save(os.path.join(root, 'melspectrogram_coughvid_features_20k_40k.npy'), features)
logger.info(os.path.join(root, 'melspectrogram_coughvid_features_20k_40k.npy'))

In [None]:
del features
features = extract_feature(coughvid_df[40000:60000], 'melspectrogram')

In [None]:
np.save(os.path.join(root, 'melspectrogram_coughvid_features_40k_60k.npy'), features)
logger.info(os.path.join(root, 'melspectrogram_coughvid_features_40k_60k.npy'))

2022-02-14 07:37:13,705 - MOBILENET - INFO - /content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/recognition/cnn/mobileNet/melspectrogram_coughvid_features_40k_60k.npy


In [None]:
# del features
features = extract_feature(coughvid_df[60000:75000], 'melspectrogram')

In [None]:
np.save(os.path.join(root, 'melspectrogram_coughvid_features_60k_75k.npy'), features)
logger.info(os.path.join(root, 'melspectrogram_coughvid_features_60k_75k.npy'))

2022-02-14 10:27:18,736 - MOBILENET - INFO - /content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/recognition/cnn/mobileNet/melspectrogram_coughvid_features_60k_75k.npy


In [None]:
del features
features = extract_feature(coughvid_df[75000:], 'melspectrogram')

In [None]:
np.save(os.path.join(root, 'melspectrogram_coughvid_features_9981.npy'), features)
logger.info(os.path.join(root, 'melspectrogram_coughvid_features_9981.npy'))

2022-02-14 10:48:54,128 - MOBILENET - INFO - /content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/recognition/cnn/mobileNet/melspectrogram_coughvid_features_9981.npy


In [None]:
# Audio('/content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data preprocessing/cough_chunking/coughvid/coughvid_cough_chunk/00039425-7f3a-42aa-ac13-834aaa2b6b92-0.wav')

In [None]:
# data, sample_rate = librosa.load('/content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data preprocessing/cough_chunking/coughvid/coughvid_cough_chunk/00039425-7f3a-42aa-ac13-834aaa2b6b92-0.wav')
# data = data[START_SAMPLE: SAMPLES_TO_CONSIDER]
# mel_spec = librosa.feature.melspectrogram(y=data)
# log_mel_spec = librosa.power_to_db(mel_spec)
# show_feature(log_mel_spec, sample_rate)

In [None]:
# data, sample_rate = librosa.load('/content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data preprocessing/cough_chunking/coughvid/coughvid_cough_chunk/00039425-7f3a-42aa-ac13-834aaa2b6b92-0.wav')
# # data = data[START_SAMPLE: SAMPLES_TO_CONSIDER]
# mel_spec = librosa.feature.melspectrogram(y=data)
# # log_mel_spec = librosa.power_to_db(mel_spec)
# show_feature(mel_spec, sample_rate)

# Full audio
* Extract features for cough audio:
  * AICovidVN: 5,247
  * Coswara: 4,465
  * Coughvid: 20,072

## Data

### Set up

In [1]:
from IPython.display import clear_output, Audio
import IPython.display as ipd
import os
import numpy as np
import shutil
import pandas as pd
import logging
import warnings
warnings.filterwarnings("ignore")


logger = logging.getLogger('FULL AUDIO EXTRACTION')
logger.setLevel(logging.DEBUG)

# create console handler and set level to debug
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)

# create formatter
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')

# add formatter to ch
ch.setFormatter(formatter)

# add ch to logger
logger.addHandler(ch)
pd.set_option('display.max_colwidth', None)
clear_output()

In [2]:
root = '/content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/recognition/cnn/mobileNet/full_audio'
data_root = '/content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data'

In [3]:
def add_root(s, name):
  temp = os.path.join(data_root, name)
  return str(os.path.join(temp, s))

### AICovidVN

### Coswara

In [None]:
coswara = os.path.join(data_root, 'coswara/coswara_metadata.csv')
coswara_df = pd.read_csv(coswara)
coswara_df['cough_path'] = coswara_df['cough_path'].apply(add_root, name='coswara')
coswara_df['label'] = coswara_df['covid_status']
coswara_df.head()

Unnamed: 0,id,age,covid_status,record_date,is_english_proficiency,gender,country,locality,state,is_returning_user,is_smoker,is_cold,is_hypertension,is_diabetes,is_cough,date_of_ct_scan,has_ctScan,ct_score,is_diarrheoa,is_fever,is_loss_of_smell,is_muscle_pain,test_type,test_date,test_status,is_using_mask,vaccination_status,is_breathing_difficulty,is_others_resp,is_fatigue,is_sore_throat,is_ischemic_heart_disease,is_asthma,is_others_preexist_conditions,is_chronic_lung_disease,is_neumonia,cough_path,label
0,iV3Db6t1T8b7c5HQY2TwxIhjbzD3,28,healthy,2020-04-23,y,male,India,Anantapur,Andhra Pradesh,n,,,,,,,,,,,,,,,,,,,,,,,,,,,/content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data/coswara/coswara_cough_data/20200424/iV3Db6t1T8b7c5HQY2TwxIhjbzD3/cough-shallow.wav,healthy
1,AxuYWBN0jFVLINCBqIW5aZmGCdu1,25,healthy,2020-04-20,y,male,India,BENGALURU URBAN,Karnataka,n,True,,,,,,,,,,,,,,,,,,,,,,,,,,/content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data/coswara/coswara_cough_data/20200424/AxuYWBN0jFVLINCBqIW5aZmGCdu1/cough-shallow.wav,healthy
2,C5eIsssb9GSkaAgIfsHMHeR6fSh1,28,healthy,2020-04-24,y,female,United States,Pittsburgh,Pennsylvania,n,,,,,,,,,,,,,,,,,,,,,,,,,,,/content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data/coswara/coswara_cough_data/20200424/C5eIsssb9GSkaAgIfsHMHeR6fSh1/cough-shallow.wav,healthy
3,YjbEAECMBIaZKyfqOvWy5DDImUb2,26,healthy,2020-04-23,y,male,India,Bangalore,Karnataka,n,,True,,,,,,,,,,,,,,,,,,,,,,,,,/content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data/coswara/coswara_cough_data/20200424/YjbEAECMBIaZKyfqOvWy5DDImUb2/cough-shallow.wav,healthy
4,aGOvk4ji0cVqIzCs1jHnzlw2UEy2,32,healthy,2020-04-22,y,male,India,Nalanda,Bihar,n,,,,,,,,,,,,,,,,,,,,,,,,,,,/content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data/coswara/coswara_cough_data/20200424/aGOvk4ji0cVqIzCs1jHnzlw2UEy2/cough-shallow.wav,healthy


In [None]:
Audio(coswara_df.loc[0, 'cough_path'])

### Coughvid

In [None]:
coughvid = os.path.join(data_root, 'coughvid/coughvid_metadata.csv')
coughvid_df = pd.read_csv(coughvid)
coughvid_df['cough_path'] = coughvid_df['cough_path'].apply(add_root, name='coughvid')
coughvid_df['label'] = coughvid_df['status']
coughvid_df.head()

Unnamed: 0,uuid,datetime,cough_detected,latitude,longitude,age,gender,respiratory_condition,fever_muscle_pain,status,quality_1,cough_type_1,dyspnea_1,wheezing_1,stridor_1,choking_1,congestion_1,nothing_1,diagnosis_1,severity_1,quality_2,cough_type_2,dyspnea_2,wheezing_2,stridor_2,choking_2,congestion_2,nothing_2,diagnosis_2,severity_2,quality_3,dyspnea_3,wheezing_3,stridor_3,choking_3,congestion_3,nothing_3,cough_type_3,diagnosis_3,severity_3,cough_path,label
0,00039425-7f3a-42aa-ac13-834aaa2b6b92,2020-04-13T21:30:59.801831+00:00,0.9609,31.3,34.8,15.0,male,False,False,healthy,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,/content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data/coughvid/coughvid_cough_data/00039425-7f3a-42aa-ac13-834aaa2b6b92.webm,healthy
1,0009eb28-d8be-4dc1-92bb-907e53bc5c7a,2020-04-12T04:02:18.159383+00:00,0.9301,40.0,-75.1,34.0,male,True,False,healthy,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,/content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data/coughvid/coughvid_cough_data/0009eb28-d8be-4dc1-92bb-907e53bc5c7a.webm,healthy
2,0012c608-33d0-4ef7-bde3-75a0b1a0024e,2020-04-15T01:03:59.029326+00:00,0.0482,-16.5,-71.5,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,/content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data/coughvid/coughvid_cough_data/0012c608-33d0-4ef7-bde3-75a0b1a0024e.webm,
3,001328dc-ea5d-4847-9ccf-c5aa2a3f2d0f,2020-04-13T22:23:06.997578+00:00,0.9968,,,21.0,male,False,False,healthy,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,/content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data/coughvid/coughvid_cough_data/001328dc-ea5d-4847-9ccf-c5aa2a3f2d0f.webm,healthy
4,001c85a8-cc4d-4921-9297-848be52d4715,2020-04-17T15:24:35.822355+00:00,0.0735,40.6,-3.6,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,/content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data/coughvid/coughvid_cough_data/001c85a8-cc4d-4921-9297-848be52d4715.webm,


##Feature extraction
* Image-like log-melspectrogram in shape (224, 246)
* MFCCs features to get shape: (234, 246)

* Some cough audio cannot be extract, so we generate a metadata for each dataset with field ``error`` (1: error audio, 0: normal audio). This is helpful when matching labels with extracted features.

### Set up

In [4]:
import librosa
import librosa.display as ldp
import matplotlib.pyplot as plt
import IPython.display as ipd
import numpy as np
# clear_output()

In [5]:
START_SAMPLE = 10000
SAMPLES_TO_CONSIDER = 135440 # 5 seconcs


# def show_feature(feature, sample_rate):
#   plt.figure(figsize=(15, 10))
#   ldp.specshow(feature,
#                x_axis="time",
#                y_axis="mel", 
#                sr=sample_rate)
#   plt.colorbar(format="%+2.f")
#   plt.show()


def lengthen_cough(data: np.array, n):
  temp = data
  for i in range(n-1):
    temp = np.concatenate([temp, data])
  return temp

# def concat_list(data: list, n):
#   temp = []
#   for i in range(n):
#     temp += data
#   return temp


def mel_spectrogram(path: str, nfft=2048, hoplen=512, nmels=224):
  data, sample_rate = librosa.load(path)
  data = librosa.util.fix_length(data, SAMPLES_TO_CONSIDER) # take 5 seconds
  data = data[START_SAMPLE:]
  # long_data = lengthen_cough(data, 10)
  mel_spec = librosa.feature.melspectrogram(y=data,
                                            sr=sample_rate,
                                            n_fft=nfft,
                                            hop_length=hoplen,
                                            n_mels=nmels)
  # logger.info(path)
  log_mel_spec = librosa.power_to_db(mel_spec)
  return log_mel_spec

def n_mfcc(path: str, nfft=2048, hoplen=512, nmels=224, nmfcc=13):
  data, sample_rate = librosa.load(path)
  data = librosa.util.fix_length(data, SAMPLES_TO_CONSIDER) # take 5 seconds
  data = data[START_SAMPLE:]
  # data = lengthen_cough(data, 10)
  mfcc = librosa.feature.mfcc(y=data,
                              sr=sample_rate,
                              n_fft=nfft,
                              hop_length=hoplen,
                              n_mfcc=nmfcc)
  # logger.info(path)
  # return mfcc.T.tolist()
  return lengthen_cough(mfcc, int(nmels/nmfcc)+1)

In [6]:
def extract_feature(data: pd.DataFrame, f_type: str, nmfcc=13):
  features = []
  error = []
  if f_type.lower() == 'melspectrogram':
    for fp in data['cough_path'].tolist():
      try:
        features.append(mel_spectrogram(path=fp))
        error.append(0)
        if len(features) % 1000 == 0:
          logger.info(f'{len(features)} audio extracted !!!')
      except:
        error.append(1)
        logger.info(f'ERROR COUGH: {fp}')

  if f_type.lower() == 'mfcc':
    for fp in data['cough_path'].tolist():
      try:
        features.append(n_mfcc(path=fp, nmfcc=nmfcc))
        error.append(0)
        if len(features) % 1000 == 0:
          logger.info(f'{len(features)} audio extracted !!!')
      except:
        error.append(1)
        logger.info(f'ERROR COUGH: {fp}')

  logger.info(f'{len(features)} audio extracted !!!')
  logger.info('Feature extraction: Done !!!')
  return np.array(features), error

### AICovidVN

### Coswara

In [None]:
features, error = extract_feature(coswara_df, 'melspectrogram')
np.save(os.path.join(root, 'melspectrogram_coswara_features.npy'), features)
logger.info(os.path.join(root, 'melspectrogram_coswara_features.npy'))
coswara_df['error'] = error
coswara_df.to_csv(os.path.join(root, 'coswara_metadata_for_label_matching.csv'), index=0)
logger.info(os.path.join(root, 'coswara_metadata_for_label_matching.csv'))

2022-02-23 01:48:37,201 - FULL AUDIO EXTRACTION - INFO - ERROR COUGH: /content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data/coswara/coswara_cough_data/20210816/WcRpZenx0WWQmzOUPTHEEGlyJAg1/cough-shallow.wav
2022-02-23 01:49:22,470 - FULL AUDIO EXTRACTION - INFO - ERROR COUGH: /content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data/coswara/coswara_cough_data/20200419/XFX3DxpzWlTsqde0wmliVzvRXnf1/cough-shallow.wav
2022-02-23 01:49:51,033 - FULL AUDIO EXTRACTION - INFO - ERROR COUGH: /content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data/coswara/coswara_cough_data/20200824/pBBuvcoBj7hjmNVYFICT4hQYRGw1/cough-shallow.wav
2022-02-23 01:50:23,104 - FULL AUDIO EXTRACTION - INFO - ERROR COUGH: /content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data/coswara/coswara_cough_data/20200820/Tfvspm3rapd3ZLyAoMG36VxNQnr2/cough-shallow.wav
2022-02-23 01:50:25,637 - FULL AUDIO EXTRACTION - INFO - ERROR COUGH: /content/drive/MyDrive/Colab Notebooks/Senior 

In [None]:
del features
features, _ = extract_feature(coswara_df, 'mfcc')
np.save(os.path.join(root, 'mfcc_13_coswara_features.npy'), features)
logger.info(os.path.join(root, 'mfcc_13_coswara_features.npy'))

2022-02-23 02:35:31,383 - FULL AUDIO EXTRACTION - INFO - ERROR COUGH: /content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data/coswara/coswara_cough_data/20210816/WcRpZenx0WWQmzOUPTHEEGlyJAg1/cough-shallow.wav
2022-02-23 02:35:52,261 - FULL AUDIO EXTRACTION - INFO - ERROR COUGH: /content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data/coswara/coswara_cough_data/20200419/XFX3DxpzWlTsqde0wmliVzvRXnf1/cough-shallow.wav
2022-02-23 02:36:03,025 - FULL AUDIO EXTRACTION - INFO - ERROR COUGH: /content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data/coswara/coswara_cough_data/20200824/pBBuvcoBj7hjmNVYFICT4hQYRGw1/cough-shallow.wav
2022-02-23 02:36:16,189 - FULL AUDIO EXTRACTION - INFO - ERROR COUGH: /content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data/coswara/coswara_cough_data/20200820/Tfvspm3rapd3ZLyAoMG36VxNQnr2/cough-shallow.wav
2022-02-23 02:36:17,392 - FULL AUDIO EXTRACTION - INFO - ERROR COUGH: /content/drive/MyDrive/Colab Notebooks/Senior 

In [None]:
del features
features, _ = extract_feature(coswara_df, 'mfcc', nmfcc=26)
np.save(os.path.join(root, 'mfcc_26_coswara_features.npy'), features)
logger.info(os.path.join(root, 'mfcc_26_coswara_features.npy'))

2022-02-23 02:58:51,121 - FULL AUDIO EXTRACTION - INFO - ERROR COUGH: /content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data/coswara/coswara_cough_data/20210816/WcRpZenx0WWQmzOUPTHEEGlyJAg1/cough-shallow.wav
2022-02-23 02:59:11,904 - FULL AUDIO EXTRACTION - INFO - ERROR COUGH: /content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data/coswara/coswara_cough_data/20200419/XFX3DxpzWlTsqde0wmliVzvRXnf1/cough-shallow.wav
2022-02-23 02:59:22,589 - FULL AUDIO EXTRACTION - INFO - ERROR COUGH: /content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data/coswara/coswara_cough_data/20200824/pBBuvcoBj7hjmNVYFICT4hQYRGw1/cough-shallow.wav
2022-02-23 02:59:35,746 - FULL AUDIO EXTRACTION - INFO - ERROR COUGH: /content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data/coswara/coswara_cough_data/20200820/Tfvspm3rapd3ZLyAoMG36VxNQnr2/cough-shallow.wav
2022-02-23 02:59:36,949 - FULL AUDIO EXTRACTION - INFO - ERROR COUGH: /content/drive/MyDrive/Colab Notebooks/Senior 

In [None]:
del features
features, _ = extract_feature(coswara_df, 'mfcc', nmfcc=39)
np.save(os.path.join(root, 'mfcc_39_coswara_features.npy'), features)
logger.info(os.path.join(root, 'mfcc_39_coswara_features.npy'))

2022-02-23 03:22:08,821 - FULL AUDIO EXTRACTION - INFO - ERROR COUGH: /content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data/coswara/coswara_cough_data/20210816/WcRpZenx0WWQmzOUPTHEEGlyJAg1/cough-shallow.wav
2022-02-23 03:22:29,518 - FULL AUDIO EXTRACTION - INFO - ERROR COUGH: /content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data/coswara/coswara_cough_data/20200419/XFX3DxpzWlTsqde0wmliVzvRXnf1/cough-shallow.wav
2022-02-23 03:22:40,172 - FULL AUDIO EXTRACTION - INFO - ERROR COUGH: /content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data/coswara/coswara_cough_data/20200824/pBBuvcoBj7hjmNVYFICT4hQYRGw1/cough-shallow.wav
2022-02-23 03:22:53,242 - FULL AUDIO EXTRACTION - INFO - ERROR COUGH: /content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/data/coswara/coswara_cough_data/20200820/Tfvspm3rapd3ZLyAoMG36VxNQnr2/cough-shallow.wav
2022-02-23 03:22:54,456 - FULL AUDIO EXTRACTION - INFO - ERROR COUGH: /content/drive/MyDrive/Colab Notebooks/Senior 

### Coughvid

In [None]:
features, error = extract_feature(coughvid_df, 'melspectrogram')
np.save(os.path.join(root, 'melspectrogram_coughvid_features.npy'), features)
logger.info(os.path.join(root, 'melspectrogram_coughvid_features.npy'))
coughvid_df['error'] = error
coughvid_df.to_csv(os.path.join(root, 'coughvid_metadata_for_label_matching.csv'), index=0)
logger.info(os.path.join(root, 'coughvid_metadata_for_label_matching.csv'))

In [None]:
# del features
features, _ = extract_feature(coughvid_df, 'mfcc')
np.save(os.path.join(root, 'mfcc_13_coughvid_features.npy'), features)
logger.info(os.path.join(root, 'mfcc_13_coughvid_features.npy'))

2022-02-25 02:16:23,854 - FULL AUDIO EXTRACTION - INFO - 1000 audio extracted !!!
2022-02-25 02:33:32,906 - FULL AUDIO EXTRACTION - INFO - 2000 audio extracted !!!
2022-02-25 02:51:25,445 - FULL AUDIO EXTRACTION - INFO - 3000 audio extracted !!!
2022-02-25 03:10:19,519 - FULL AUDIO EXTRACTION - INFO - 4000 audio extracted !!!
2022-02-25 03:32:27,861 - FULL AUDIO EXTRACTION - INFO - 5000 audio extracted !!!
2022-02-25 03:55:31,821 - FULL AUDIO EXTRACTION - INFO - 6000 audio extracted !!!
2022-02-25 04:19:29,524 - FULL AUDIO EXTRACTION - INFO - 7000 audio extracted !!!
2022-02-25 04:44:05,796 - FULL AUDIO EXTRACTION - INFO - 8000 audio extracted !!!
2022-02-25 05:07:55,463 - FULL AUDIO EXTRACTION - INFO - 9000 audio extracted !!!
2022-02-25 05:34:37,418 - FULL AUDIO EXTRACTION - INFO - 10000 audio extracted !!!
2022-02-25 06:01:52,685 - FULL AUDIO EXTRACTION - INFO - 11000 audio extracted !!!
2022-02-25 06:28:22,980 - FULL AUDIO EXTRACTION - INFO - 12000 audio extracted !!!
2022-02-25 06

In [None]:
# del features
features, _ = extract_feature(coughvid_df[:10000], 'mfcc', nmfcc=26)
np.save(os.path.join(root, '1mfcc_26_coughvid_features.npy'), features)
logger.info(os.path.join(root, '1mfcc_26_coughvid_features.npy'))

2022-02-27 00:17:11,375 - FULL AUDIO EXTRACTION - INFO - 1000 audio extracted !!!
2022-02-27 00:33:43,098 - FULL AUDIO EXTRACTION - INFO - 2000 audio extracted !!!
2022-02-27 00:50:51,114 - FULL AUDIO EXTRACTION - INFO - 3000 audio extracted !!!
2022-02-27 01:08:26,513 - FULL AUDIO EXTRACTION - INFO - 4000 audio extracted !!!
2022-02-27 01:27:11,998 - FULL AUDIO EXTRACTION - INFO - 5000 audio extracted !!!
2022-02-27 01:46:32,006 - FULL AUDIO EXTRACTION - INFO - 6000 audio extracted !!!
2022-02-27 02:06:44,902 - FULL AUDIO EXTRACTION - INFO - 7000 audio extracted !!!
2022-02-27 02:27:58,704 - FULL AUDIO EXTRACTION - INFO - 8000 audio extracted !!!
2022-02-27 02:50:04,152 - FULL AUDIO EXTRACTION - INFO - 9000 audio extracted !!!
2022-02-27 03:12:15,445 - FULL AUDIO EXTRACTION - INFO - 10000 audio extracted !!!
2022-02-27 03:12:15,448 - FULL AUDIO EXTRACTION - INFO - 10000 audio extracted !!!
2022-02-27 03:12:15,455 - FULL AUDIO EXTRACTION - INFO - Feature extraction: Done !!!
2022-02-27

In [None]:
del features
features, _ = extract_feature(coughvid_df[10000:], 'mfcc', nmfcc=26)
np.save(os.path.join(root, '2mfcc_26_coughvid_features.npy'), features)
logger.info(os.path.join(root, '2mfcc_26_coughvid_features.npy'))

2022-02-27 03:28:45,084 - FULL AUDIO EXTRACTION - INFO - 1000 audio extracted !!!
2022-02-27 03:46:03,976 - FULL AUDIO EXTRACTION - INFO - 2000 audio extracted !!!
2022-02-27 04:04:23,451 - FULL AUDIO EXTRACTION - INFO - 3000 audio extracted !!!
2022-02-27 04:23:44,993 - FULL AUDIO EXTRACTION - INFO - 4000 audio extracted !!!
2022-02-27 04:43:49,921 - FULL AUDIO EXTRACTION - INFO - 5000 audio extracted !!!
2022-02-27 05:04:18,340 - FULL AUDIO EXTRACTION - INFO - 6000 audio extracted !!!
2022-02-27 05:25:54,777 - FULL AUDIO EXTRACTION - INFO - 7000 audio extracted !!!
2022-02-27 05:47:45,813 - FULL AUDIO EXTRACTION - INFO - 8000 audio extracted !!!
2022-02-27 06:10:25,286 - FULL AUDIO EXTRACTION - INFO - 9000 audio extracted !!!
2022-02-27 06:34:52,455 - FULL AUDIO EXTRACTION - INFO - 10000 audio extracted !!!
2022-02-27 06:36:42,561 - FULL AUDIO EXTRACTION - INFO - 10072 audio extracted !!!
2022-02-27 06:36:42,564 - FULL AUDIO EXTRACTION - INFO - Feature extraction: Done !!!
2022-02-27

In [11]:
f1 = np.load(os.path.join(root, '1mfcc_26_coughvid_features.npy'))
f2 = np.load(os.path.join(root, '2mfcc_26_coughvid_features.npy'))
f = np.concatenate([f1, f2])
np.save(os.path.join(root, 'mfcc_26_coughvid_features.npy'), f)
logger.info(os.path.join(root, 'mfcc_26_coughvid_features.npy'))

2022-02-27 11:02:29,600 - FULL AUDIO EXTRACTION - INFO - /content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/recognition/cnn/mobileNet/full_audio/mfcc_26_coughvid_features.npy


In [None]:
# del features
features, _ = extract_feature(coughvid_df, 'mfcc', nmfcc=39)
np.save(os.path.join(root, 'mfcc_39_coughvid_features.npy'), features)
logger.info(os.path.join(root, 'mfcc_39_coughvid_features.npy'))

In [7]:
f1 = np.load(os.path.join(root, '1mfcc_39_coughvid_features.npy'))
f2 = np.load(os.path.join(root, '2mfcc_39_coughvid_features.npy'))
f = np.concatenate([f1, f2])
np.save(os.path.join(root, 'mfcc_39_coughvid_features.npy'), f)
logger.info(os.path.join(root, 'mfcc_39_coughvid_features.npy'))

2022-02-27 11:06:29,770 - FULL AUDIO EXTRACTION - INFO - /content/drive/MyDrive/Colab Notebooks/Senior Thesis/Workspace/recognition/cnn/mobileNet/full_audio/mfcc_39_coughvid_features.npy
