In [1]:
import sys  
sys.path.insert(0, '../')

import flammkuchen as fl
import os
import librosa
import matplotlib.pyplot as plt
import numpy as np
from sklearn.manifold import TSNE
from scipy.stats import mode
from tqdm.notebook import tqdm
from numba import njit
from nutil.plot import paperStyle
from skimage.transform import resize

from utils.params import Params
from utils.Helper import scale_minmax
from neural_networks.src.dataloader import DataLoader

%load_ext autoreload
%autoreload 2

In [2]:
PATH_MIC_FILES = r"../../Data/EachEventInSingleFile/MIC/"
PATH_NSA_FILES = r"../../Data/EachEventInSingleFile/NSA/"
PATH_DATA = r"../../Data/coughvid_database/"

PATH_PARAMS = r"../neural_networks/params.json"

In [3]:
def get_all_spectrograms(signal_type, event_type):
    if signal_type == "MIC":
        path = PATH_MIC_FILES
    else:
        path = PATH_NSA_FILES
        
    return_list = []
    print(f"Loading {signal_type}, {event_type}:")
    for file in tqdm(os.listdir(path + event_type)):
        data, sr = librosa.load(path + event_type + "\\" + file, sr=44100)
        spec = get_spectrogram_image(data, sr)
        return_list.append(spec)
    return return_list

def get_coughDB_data():
    params = Params(PATH_PARAMS)

    data_loader = DataLoader(params=params, nb_classes=4)
    coughs, _ = data_loader.get_cough_database_data(PATH_DATA, n_samples=100)
    return coughs

def get_spectrogram_image(y, sr):
    mels = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=64, n_fft=1024, hop_length=64, fmax=16384, htk=True)
    mels = np.log(mels + np.finfo(float).eps)
    mels = scale_minmax(mels, -1.0, 1.0)
    mels = np.flip(mels, axis=0)  # put low frequencies at the bottom of the image

    mels = resize(mels, (64, 64))
    return mels

# Load the spectrograms from all events

In [4]:
coughs_mic = get_all_spectrograms("MIC", "Cough")
dryswallows_mic = get_all_spectrograms("MIC", "Dry swallow")
throatclears_mic =  get_all_spectrograms("MIC", "Throat clear")
silence_mic = get_all_spectrograms("MIC", "Silence")
speech_mic =get_all_spectrograms("MIC", "Speech")

Loading MIC, Cough:


  0%|          | 0/294 [00:00<?, ?it/s]

Loading MIC, Dry swallow:


  0%|          | 0/287 [00:00<?, ?it/s]

Loading MIC, Throat clear:


  0%|          | 0/283 [00:00<?, ?it/s]

Loading MIC, Silence:


  0%|          | 0/258 [00:00<?, ?it/s]

Loading MIC, Speech:


  0%|          | 0/628 [00:00<?, ?it/s]

In [5]:
coughs_nsa = get_all_spectrograms("NSA", "Cough")
dryswallows_nsa = get_all_spectrograms("NSA", "Dry swallow")
throatclears_nsa =  get_all_spectrograms("NSA", "Throat clear")
silence_nsa = get_all_spectrograms("NSA", "Silence")
speech_nsa =get_all_spectrograms("NSA", "Speech")

Loading NSA, Cough:


  0%|          | 0/294 [00:00<?, ?it/s]

Loading NSA, Dry swallow:


  0%|          | 0/287 [00:00<?, ?it/s]

Loading NSA, Throat clear:


  0%|          | 0/283 [00:00<?, ?it/s]

Loading NSA, Silence:


  0%|          | 0/258 [00:00<?, ?it/s]

Loading NSA, Speech:


  0%|          | 0/628 [00:00<?, ?it/s]

In [6]:
coughs_coughDB = get_coughDB_data()

Converting files to .wav


  0%|          | 0/20072 [00:00<?, ?it/s]

Load data as spectrograms


  0%|          | 0/100 [00:00<?, ?it/s]

# Extract features from the spectrograms (mean, min, max, median, mode and std)

In [7]:
def get_features(data):
    features = []
    for d in tqdm(data):
        d = np.asarray(d)
        features_array = np.zeros((d.shape[0], 6))
        for i in range(d.shape[0]):
            features_array[i, 0] = np.mean(d[i,...])
            features_array[i, 1] = np.min(d[i,...])
            features_array[i, 2] = np.max(d[i,...])
            features_array[i, 3] = np.median(d[i,...])
            features_array[i, 4] = mode(d[i,...])[0]
            features_array[i, 5] = np.std(d[i,...])
        features.append(features_array)
    return np.asarray(features)

In [8]:
c_features_mic = get_features(coughs_mic)
ds_features_mic = get_features(dryswallows_mic)
tc_features_mic = get_features(throatclears_mic)
sc_features_mic = get_features(silence_mic)
sp_features_mic = get_features(speech_mic)

coughDB_features = get_features(coughs_coughDB)

c_features_nsa = get_features(coughs_nsa)
ds_features_nsa = get_features(dryswallows_nsa)
tc_features_nsa = get_features(throatclears_nsa)
sc_features_nsa = get_features(silence_nsa)
sp_features_nsa = get_features(speech_nsa)

  0%|          | 0/294 [00:00<?, ?it/s]

  0%|          | 0/287 [00:00<?, ?it/s]

  0%|          | 0/283 [00:00<?, ?it/s]

  0%|          | 0/258 [00:00<?, ?it/s]

  0%|          | 0/628 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/294 [00:00<?, ?it/s]

  0%|          | 0/287 [00:00<?, ?it/s]

  0%|          | 0/283 [00:00<?, ?it/s]

  0%|          | 0/258 [00:00<?, ?it/s]

  0%|          | 0/628 [00:00<?, ?it/s]

# Perform t-SNE and plot it

In [9]:
def perform_tsne(features, perplexity, n_iter, learning_rate):
    X = np.concatenate([i for i in features], axis=0)
    X = X.reshape(X.shape[0], X.shape[1] * X.shape[2])
    tsne = TSNE(n_components=2, perplexity=perplexity, n_iter=n_iter, learning_rate=learning_rate)
    embedding = tsne.fit_transform(X)
    
    return_list = []
    pos = 0
    for i in features:
        arr = embedding[pos:pos+len(i)]
        pos += len(i)
        return_list.append(arr)
    return return_list

def plot_tsne(data, labels, colors, title, alpha):
    if len(data) != len(colors):
        raise ValueError("Number of colors doesn't match the number of events to be plotted")
    with paperStyle():
        plt.figure(figsize=(4,4))
        for idx, d in enumerate(data):
            plt.scatter(d[..., 0], d[..., 1], color=colors[idx], label=labels[idx], alpha=alpha)
        plt.legend()
        plt.title(title)
        plt.xlabel("t-SNE 1")
        plt.ylabel("t-SNE 2")
        plt.show()

## t-SNE: 3 classes

In [None]:
PERPLEXITY = 40
N_ITER = 1500
LEARNING_RATE = 30

save = False

## Audio ##
c_embedded_mic, ds_embedded_mic, tc_embedded_mic = perform_tsne([c_features_mic, ds_features_mic, tc_features_mic], 
                                                                perplexity=PERPLEXITY, 
                                                                n_iter=N_ITER, 
                                                                learning_rate=LEARNING_RATE)

plot_tsne(data=[c_embedded_mic, ds_embedded_mic, tc_embedded_mic], 
          labels=['Cough', 'Dry swallow', 'Throat clear'],
          colors=['cornflowerblue', 'darkorange', 'mediumspringgreen'],
          title='t-SNE - Audio', 
          alpha=0.5)


## NSA ##
c_embedded_nsa, ds_embedded_nsa, tc_embedded_nsa = perform_tsne([c_features_nsa, ds_features_nsa, tc_features_nsa], 
                                                                perplexity=PERPLEXITY, 
                                                                n_iter=N_ITER, 
                                                                learning_rate=LEARNING_RATE)

plot_tsne(data=[c_embedded_nsa, ds_embedded_nsa, tc_embedded_nsa], 
          labels=['Cough', 'Dry swallow', 'Throat clear'],
          colors=['cornflowerblue', 'darkorange', 'mediumspringgreen'],
          title='t-SNE - NSA', 
          alpha=0.5)

if save:
    # save the data for plotting it later again
    d = dict(cough_mic=c_embedded_mic, dryswallow_mic=ds_embedded_mic, throatclear_mic=tc_embedded_mic, 
             perplexity=PERPLEXITY, n_iter=N_ITER, learning_rate=LEARNING_RATE)
    if os.path.exists(VFP_PATH):
        fl.save("c_ds_tc_mic.vfp", d)
    
    d = dict(cough_nsa=c_embedded_nsa, dryswallow_nsa=ds_embedded_nsa, throatclear_nsa=tc_embedded_nsa, 
             perplexity=PERPLEXITY, n_iter=N_ITER, learning_rate=LEARNING_RATE)
    if os.path.exists(VFP_PATH):
        fl.save("c_ds_tc_nsa.vfp", d)

## t-SNE: 5 classes

In [None]:
PERPLEXITY = 40
N_ITER = 1500
LEARNING_RATE = 30

save = False

## Audio ##
c_embedded_mic, ds_embedded_mic, tc_embedded_mic, sc_embedded_mic, sp_embedded_mic = perform_tsne([c_features_mic, 
                                                                                                   ds_features_mic, 
                                                                                                   tc_features_mic, 
                                                                                                   sc_features_mic, 
                                                                                                   sp_features_mic],
                                                                                                  perplexity=PERPLEXITY,
                                                                                                  n_iter=N_ITER,
                                                                                                  learning_rate=LEARNING_RATE)

plot_tsne(data=[c_embedded_mic, ds_embedded_mic, tc_embedded_mic, sc_embedded_mic, sp_embedded_mic], 
          labels=['Cough', 'Dry swallow', 'Throat clear', 'Silence', 'Speech'],
          colors=['cornflowerblue', 'darkorange', 'mediumspringgreen', 'magenta', 'green'],
          title='t-SNE - Audio', 
          alpha=0.5)

## NSA ##
c_embedded_nsa, ds_embedded_nsa, tc_embedded_nsa, sc_embedded_nsa, sp_embedded_nsa = perform_tsne([c_features_nsa,
                                                                                                   ds_features_nsa,
                                                                                                   tc_features_nsa,
                                                                                                   sc_features_nsa,
                                                                                                   sp_features_nsa],
                                                                                                  perplexity=PERPLEXITY,
                                                                                                  n_iter=N_ITER, 
                                                                                                  learning_rate=LEARNING_RATE)

plot_tsne(data=[c_embedded_nsa, ds_embedded_nsa, tc_embedded_nsa, sc_embedded_nsa, sp_embedded_nsa], 
          labels=['Cough', 'Dry swallow', 'Throat clear', 'Silence', 'Speech'],
          colors=['cornflowerblue', 'darkorange', 'mediumspringgreen', 'magenta', 'green'],
          title='t-SNE - NSA', 
          alpha=0.5)

if save:
    # save the data for plotting it later again
    d = dict(cough_mic=c_embedded_mic, dryswallow_mic=ds_embedded_mic, throatclear_mic=tc_embedded_mic, 
             silence_mic=sc_embedded_mic, speech_mic=sp_embedded_mic, perplexity=PERPLEXITY, 
             n_iter=N_ITER, learning_rate=LEARNING_RATE)
    if os.path.exists(VFP_PATH):
        fl.save("c_ds_tc_sc_sp_mic.vfp", d)
    
    d = dict(cough_nsa=c_embedded_nsa, dryswallow_nsa=ds_embedded_nsa, throatclear_nsa=tc_embedded_nsa, 
             silence_nsa=sc_embedded_nsa, speech_nsa=sp_embedded_nsa, perplexity=PERPLEXITY, 
             n_iter=N_ITER, learning_rate=LEARNING_RATE)
    fl.save("c_ds_tc_sc_sp_nsa.vfp", d)