In [None]:
c_names = ['Bronchiectasis', 'COPD', 'Healthy', 'Pneumonia', 'URTI']

In [None]:
!uv pip install tensorflow

In [None]:
import librosa
import numpy as np
def preprocessing(audio_file, mode):
    # we want to resample audio to 16 kHz
    sr_new = 16000 # 16kHz sample rate
    x, sr = librosa.load(audio_file, sr=sr_new)

    # padding sound 
    # because duration of sound is dominantly 20 s and all of sample rate is 22050
    # we want to pad or truncated sound which is below or above 20 s respectively
    max_len = 5 * sr_new  # length of sound array = time x sample rate
    if x.shape[0] < max_len:
      # padding with zero
      pad_width = max_len - x.shape[0]
      x = np.pad(x, (0, pad_width))
    elif x.shape[0] > max_len:
      # truncated
      x = x[:max_len]
    
    if mode == 'mfcc':
      feature = librosa.feature.mfcc(y=x, sr=sr_new)
    
    elif mode == 'log_mel':
      feature = librosa.feature.melspectrogram(y=x, sr=sr_new, n_mels=128, fmax=8000)
      feature = librosa.power_to_db(feature, ref=np.max) 

    return feature

In [None]:
import tensorflow as tf

# Load the model  
loaded_model = tf.keras.models.load_model("models/lung_disease_predictor_cnn_logmel_without_data_augmentation.keras")


In [None]:
audio_file = 'respiratory-sound-database/Respiratory_Sound_Database/Respiratory_Sound_Database/audio_and_txt_files/101_1b1_Al_sc_Meditron.wav'

In [None]:
processed_audio = preprocessing(audio_file,'mfcc').reshape((-1, 20, 157, 1)) 
new_preds = loaded_model.predict(processed_audio)

In [None]:

new_classpreds = np.argmax(new_preds, axis=1)
c_names[new_classpreds[0]]

In [None]:
import visualkeras

In [None]:

visualkeras.layered_view(loaded_model).show() 

In [None]:
from PIL import ImageFont

font = ImageFont.truetype("/System/Library/Fonts/Supplemental/Arial Rounded Bold.ttf", 20)  
visualkeras.layered_view(loaded_model, legend=True, font=font, to_file='images/model2.png',spacing=24)  # font is optional!