In [7]:
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
import librosa
import pandas as pd

# Load YAMNet
yamnet_model = hub.load("https://tfhub.dev/google/yamnet/1")

# Get the path to the class map CSV from the model
class_map_path_tensor = yamnet_model.class_map_path()   # this is a tf.Tensor
class_map_path = class_map_path_tensor.numpy().decode('utfâ€‘8')

# Read class names
class_map = pd.read_csv(class_map_path)
class_names = class_map['display_name'].tolist()

print("Loaded class names (first 20):", class_names[:20])

def predict_cough_from_file(file_path, threshold=0.3):
    y, sr = librosa.load(file_path, sr=None)
    if y.ndim > 1:
        y = np.mean(y, axis=1)
    if sr != 16000:
        y = librosa.resample(y, orig_sr=sr, target_sr=16000)
        sr = 16000
    y = y.astype(np.float32)

    scores, embeddings, spectrogram = yamnet_model(y)
    scores_np = scores.numpy()
    mean_scores = np.mean(scores_np, axis=0)

    # Check if "Cough" exists in class names
    if "Cough" in class_names:
        cough_idx = class_names.index("Cough")
        cough_prob = mean_scores[cough_idx]
    else:
        cough_prob = 0.0
    print(f"Predicted cough probability: {cough_prob:.3f}")
    if cough_prob > threshold:
        print("=> Cough detected!")
    else:
        print("=> No cough detected.")
    return cough_prob


Loaded class names (first 20): ['Speech', 'Child speech, kid speaking', 'Conversation', 'Narration, monologue', 'Babbling', 'Speech synthesizer', 'Shout', 'Bellow', 'Whoop', 'Yell', 'Children shouting', 'Screaming', 'Whispering', 'Laughter', 'Baby laughter', 'Giggle', 'Snicker', 'Belly laugh', 'Chuckle, chortle', 'Crying, sobbing']


In [8]:
predict_cough_from_file("caugh_sound.mp3")

Predicted cough probability: 0.680
=> Cough detected!


np.float32(0.67957014)

In [10]:
class_names

['Speech',
 'Child speech, kid speaking',
 'Conversation',
 'Narration, monologue',
 'Babbling',
 'Speech synthesizer',
 'Shout',
 'Bellow',
 'Whoop',
 'Yell',
 'Children shouting',
 'Screaming',
 'Whispering',
 'Laughter',
 'Baby laughter',
 'Giggle',
 'Snicker',
 'Belly laugh',
 'Chuckle, chortle',
 'Crying, sobbing',
 'Baby cry, infant cry',
 'Whimper',
 'Wail, moan',
 'Sigh',
 'Singing',
 'Choir',
 'Yodeling',
 'Chant',
 'Mantra',
 'Child singing',
 'Synthetic singing',
 'Rapping',
 'Humming',
 'Groan',
 'Grunt',
 'Whistling',
 'Breathing',
 'Wheeze',
 'Snoring',
 'Gasp',
 'Pant',
 'Snort',
 'Cough',
 'Throat clearing',
 'Sneeze',
 'Sniff',
 'Run',
 'Shuffle',
 'Walk, footsteps',
 'Chewing, mastication',
 'Biting',
 'Gargling',
 'Stomach rumble',
 'Burping, eructation',
 'Hiccup',
 'Fart',
 'Hands',
 'Finger snapping',
 'Clapping',
 'Heart sounds, heartbeat',
 'Heart murmur',
 'Cheering',
 'Applause',
 'Chatter',
 'Crowd',
 'Hubbub, speech noise, speech babble',
 'Children playing'