# EchoCare Demo - Cry Detection & Classification

This notebook demonstrates the two-stage inference pipeline:
1. **Stage 1**: Cry Detection (cry vs non-cry)
2. **Stage 2**: Cry Classification (hungry vs pain)

## 1. Import Libraries

In [None]:
import numpy as np
import librosa
import cv2
from pathlib import Path
from tensorflow import keras

## 2. Configuration

Set parameters to match the training configuration.

In [None]:
# Model paths
cry_detection_model_path = Path("C:/Users/danel/FYP/echocare-infant-cry-classification/model/cry_detection/best_model_detection.keras")
cry_classification_model_path = Path("C:/Users/danel/FYP/echocare-infant-cry-classification/model/cry_classification/best_model_classification.keras")

# Audio processing parameters (matches training configuration)
sample_rate = 16000
duration = 1.0
n_mels = 128  # mel bands for spectrogram
target_size = (224, 224)  # MobileNetV2 input size

# Normalization parameters from training (Z-score standardisation)
cry_detection_mean = -37.628456115722656
cry_detection_std = 22.107717514038086
cry_classification_mean = -40.55323028564453
cry_classification_std = 19.64647102355957

# Confidence thresholds
cry_detection_threshold = 0.85  # 85% confidence for cry detection
cry_classification_threshold = 0.70  # 70% confidence for cry classification

# Class labels
cry_types = {
    0: "Pain",
    1: "Hungry"
}

print("Configuration loaded")
print(f"  Sample Rate: {sample_rate}Hz")
print(f"  Duration: {duration}s")
print(f"  Mel Bands: {n_mels}")
print(f"  Target Size: {target_size}")

## 3. Audio Preprocessing Functions

### Preprocessing Pipeline:
1. Load and Resample (to 16kHz)
2. Ensure 1 sec duration (pad/trim)
3. Create mel-spectrogram (128, 32)
4. Convert to dB scale
5. Resize to MobileNetV2's input size (224 x 224)
6. Convert grayscale to RGB (1 channel → 3 channels)
7. Normalise mel-spectrogram (to [0, 1])

In [None]:
def load_and_preprocess_audio(audio_path, mean, std):
    """
    Load audio file and prepare for model input following the exact training pipeline.
    
    Args:
        audio_path: Path to audio file
        mean: Training mean for Z-score normalization
        std: Training std for Z-score normalization
    """
    print(f"\nLoading audio: {audio_path}")
    
    # Step 1: Load audio file at target sample rate
    audio, sr = librosa.load(audio_path, sr=sample_rate, duration=duration, mono=True)
    
    # Pad or trim to exact duration
    target_length = int(sample_rate * duration)
    if len(audio) < target_length:
        audio = np.pad(audio, (0, target_length - len(audio)))
    else:
        audio = audio[:target_length]
    
    print(f"Audio loaded: {len(audio)} samples at {sample_rate}Hz")
    
    # Step 2: Convert to mel-spectrogram
    mel_spec = librosa.feature.melspectrogram(
        y=audio,
        sr=sample_rate,
        n_mels=n_mels
    )
    print(f"Mel-spectrogram created: {mel_spec.shape}")
    
    # Step 3: Convert to log scale (dB)
    mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
    print(f"Converted to dB scale")
    
    # Step 4: Z-score standardization using training statistics
    mel_spec_normalized = (mel_spec_db - mean) / std
    print(f"Standardized using mean={mean:.2f}, std={std:.2f}")
    
    # Step 5: Prepare for MobileNetV2 (resize + RGB)
    prepared_spec = prepare_for_mobilenet(mel_spec_normalized)
    
    print(f"Final shape for model input: {prepared_spec.shape}")
    
    return prepared_spec


def prepare_for_mobilenet(spectrogram, target_size=(224, 224)):
    """
    Prepare mel-spectrogram for MobileNetV2 input.
    
    Args:
        spectrogram: numpy array mel-spectrogram (already standardized)
        target_size: tuple (height, width) for resizing
    """
    
    # Step 1: Add channel dimension if needed
    if len(spectrogram.shape) == 2:
        spectrogram = np.expand_dims(spectrogram, axis=-1)
    
    # Step 2: Resize to MobileNetV2's expected input size
    resized = cv2.resize(spectrogram, target_size)
    
    # Step 3: Ensure channel dimension exists after resize
    if len(resized.shape) == 2:
        resized = np.expand_dims(resized, axis=-1)
    
    # Step 4: Convert grayscale (1 channel) to RGB (3 channels)
    if resized.shape[-1] == 1:
        resized = np.repeat(resized, 3, axis=-1)
    
    # Step 5: Add batch dimension
    resized = np.expand_dims(resized, axis=0)
    
    return resized.astype(np.float32)

print("Preprocessing functions defined")

## 4. Model Inference Functions

In [None]:
def stage_1_cry_detection(model, audio_data):
    """
    Stage 1: Detect if audio contains a cry.
    
    Args:
        model: Loaded cry detection model
        audio_data: Preprocessed mel-spectrogram
        
    Returns:
        tuple: (is_cry, confidence_score)
    """
    print("\nStage 1: Cry Detection")
   
    # Run inference
    prediction = model.predict(audio_data, verbose=0)
    
    # Get confidence score (probability of cry)
    confidence = float(prediction[0][0])
    
    # Determine if cry detected based on threshold
    is_cry = confidence >= cry_detection_threshold
    
    print(f"Confidence Score: {confidence:.1%}")
    print(f"Detection Threshold: {cry_detection_threshold:.1%}")
    print(f"Result: {'CRY DETECTED' if is_cry else 'NO CRY DETECTED'}")
    
    return is_cry, confidence


def stage_2_cry_classification(model, audio_data):
    """
    Stage 2: Classify cry type (hungry vs pain).
    
    Args:
        model: Loaded cry classification model
        audio_data: Preprocessed mel-spectrogram
        
    Returns:
        tuple: (cry_type, confidence_score)
    """
    print("\nStage 2: Cry Classification")
    
    # Run inference
    prediction = model.predict(audio_data, verbose=0)
    
    # Get raw prediction value (sigmoid output between 0 and 1)
    raw_value = float(prediction[0][0])
    
    print(f"Raw prediction value: {raw_value:.4f}")
    
    # Interpret sigmoid output:
    # Values close to 0 → Pain (class 0)
    # Values close to 1 → Hungry (class 1)
    # Use 0.5 as threshold
    predicted_class = 1 if raw_value > 0.5 else 0
    
    # Calculate confidence
    if predicted_class == 1:  # Hungry
        confidence = raw_value  # How close to 1
    else:  # Pain
        confidence = 1 - raw_value  # How close to 0
    
    cry_type = cry_types[predicted_class]
    
    print(f"Threshold: 0.50")
    print(f"Predicted Class: {cry_type}")
    print(f"Confidence Score: {confidence:.1%}")
    print(f"Classification Threshold: {cry_classification_threshold:.1%}")
    
    # Check if confidence meets threshold
    if confidence >= cry_classification_threshold:
        print(f"Result: CLASSIFICATION CONFIDENT")
        return cry_type, confidence
    else:
        print(f"Result: CLASSIFICATION UNCERTAIN (below threshold)")
        return None, confidence


print("Inference functions defined")

## 5. Load Models

Load the trained cry detection and classification models.

In [None]:
print("Loading models:\n")

# First, check if the files exist
print(f"Checking cry detection model...")
print(f"  Path: {cry_detection_model_path}")
print(f"  Exists: {cry_detection_model_path.exists()}")
print(f"  Is file: {cry_detection_model_path.is_file()}")

print(f"\nChecking cry classification model...")
print(f"  Path: {cry_classification_model_path}")
print(f"  Exists: {cry_classification_model_path.exists()}")
print(f"  Is file: {cry_classification_model_path.is_file()}")

try:
    cry_detection_model = keras.models.load_model(cry_detection_model_path)
    print(f"\nCry detection model loaded")
    
    cry_classification_model = keras.models.load_model(cry_classification_model_path)
    print(f"Cry classification model loaded")
    
    print("\nAll models loaded successfully!")
except Exception as e:
    print(f"\nERROR loading models: {e}")

## 6. Run Inference on Test Audio

In [None]:
# Test audio file path
hungry_sample_1 = Path("C:/Users/danel/FYP/echocare-infant-cry-classification/dataset/processed/cry_detection/test/cry/hungry_0046017001.wav")
hungry_sample_2 = Path("C:/Users/danel/FYP/echocare-infant-cry-classification/dataset/processed/cry_detection/test/cry/hungry_0083007001.wav")
normal_sample = Path("C:/Users/danel/FYP/echocare-infant-cry-classification/dataset/processed/cry_detection/test/cry/normal_0052039000.wav")

pain_sample_1 = Path("C:/Users/danel/FYP/echocare-infant-cry-classification/dataset/processed/cry_detection/test/cry/pain_0013011002.wav")
pain_sample_2 = Path("C:/Users/danel/FYP/echocare-infant-cry-classification/dataset/processed/cry_detection/test/cry/pain_0022003002.wav")

non_cry_sample_1 = Path("C:/Users/danel/FYP/echocare-infant-cry-classification/dataset/processed/cry_detection/test/non-cry/vacuum_cleaner_5-263902-A-36_seg01.wav")
non_cry_sample_2 = Path("C:/Users/danel/FYP/echocare-infant-cry-classification/dataset/processed/cry_detection/test/non-cry/coughing_5-204604-A-24_seg00.wav")

In [None]:
test_audio_path = hungry_sample_1


print("ECHOCARE DEMO - CRY DETECTION & CLASSIFICATION")

### 6.1 Preprocess Audio

In [None]:
try:
    # Preprocess for detection using detection normalisation parameters
    audio_data_detection = load_and_preprocess_audio(
        test_audio_path, 
        cry_detection_mean, 
        cry_detection_std
    )
except Exception as e:
    print(f"\nERROR preprocessing audio: {e}")

### 6.2 Stage 1: Cry Detection

In [None]:
try:
    is_cry, detection_confidence = stage_1_cry_detection(
        cry_detection_model, 
        audio_data_detection
    )
except Exception as e:
    print(f"\nERROR in cry detection: {e}")

### 6.3 Stage 2: Cry Classification (only if cry detected)

In [None]:
# Preprocess for classification using classification normalisation parameters
if is_cry:
    try:
        audio_data_classification = load_and_preprocess_audio(
            test_audio_path,
            cry_classification_mean,
            cry_classification_std
        )
        
        cry_type, classification_confidence = stage_2_cry_classification(
            cry_classification_model,
            audio_data_classification  # Use classification-specific preprocessing
        )
    except Exception as e:
        print(f"\nERROR in cry classification: {e}")
        cry_type = None
        classification_confidence = 0.0
else:
    print("\nSkipping Stage 2 (no cry detected)")
    cry_type = None
    classification_confidence = 0.0

### 6.4 Final Result

In [None]:
print("\n" + "=" * 50)
print("FINAL RESULT")
print("=" * 50)

if not is_cry:
    print(f"No cry detected (confidence: {detection_confidence:.1%})")
elif cry_type is not None:
    # Confident classification
    print(f"Baby is crying: {cry_type}")
    print(f"  Classification confidence: {classification_confidence:.1%}")
else:
    # Cry detected but classification uncertain
    print(f"Baby is crying ({detection_confidence:.1%})")

print("=" * 50)