<a href="https://colab.research.google.com/github/abhishek1284/Assignment1/blob/main/cryprediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [16]:
import kagglehub

# Download latest versiona
path = kagglehub.dataset_download("warcoder/infant-cry-audio-corpus")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/infant-cry-audio-corpus


In [17]:
dataset_path = "/kaggle/input/infant-cry-audio-corpus/donateacry_corpus"

In [18]:
import os

dataset_path = "/kaggle/input/infant-cry-audio-corpus/donateacry_corpus"
2
total_files = 0
for label in os.listdir(dataset_path):
    label_path = os.path.join(dataset_path, label)
    if not os.path.isdir(label_path):
        continue
    wav_files = [f for f in os.listdir(label_path) if f.endswith('.wav')]
    print(f"Label '{label}' has {len(wav_files)} files")
    total_files += len(wav_files)

print(f"Total audio files found: {total_files}")

Label 'hungry' has 382 files
Label 'burping' has 8 files
Label 'discomfort' has 27 files
Label 'belly_pain' has 16 files
Label 'tired' has 24 files
Total audio files found: 457


In [19]:
import os
import numpy as np
import librosa
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, TimeDistributed, Flatten, LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

dataset_path = "/kaggle/input/infant-cry-audio-corpus/donateacry_corpus"

# Step 1: Verify audio files and labels
total_files = 0
for label in os.listdir(dataset_path):
    label_path = os.path.join(dataset_path, label)
    if not os.path.isdir(label_path):
        continue
    wav_files = [f for f in os.listdir(label_path) if f.endswith('.wav')]
    print(f"Label '{label}' has {len(wav_files)} files")
    total_files += len(wav_files)

print(f"Total audio files found: {total_files}")
if total_files == 0:
    raise RuntimeError("No audio files found! Check your dataset path or dataset download.")

# Step 2: Feature extraction function
def extract_mel(file_path, max_len=216):
    y, sr = librosa.load(file_path, sr=22050)
    mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
    mel_db = librosa.power_to_db(mel, ref=np.max)
    if mel_db.shape[1] < max_len:
        pad_width = max_len - mel_db.shape[1]
        mel_db = np.pad(mel_db, pad_width=((0, 0), (0, pad_width)), mode='constant')
    else:
        mel_db = mel_db[:, :max_len]
    return mel_db

# Step 3: Load data and labels
X = []
y = []

for label in os.listdir(dataset_path):
    label_path = os.path.join(dataset_path, label)
    if not os.path.isdir(label_path):
        continue
    wav_files = [f for f in os.listdir(label_path) if f.endswith('.wav')]
    for wav_file in wav_files:
        file_path = os.path.join(label_path, wav_file)
        features = extract_mel(file_path)
        X.append(features)
        y.append(label)

X = np.array(X)
y = np.array(y)
X = np.expand_dims(X, -1)  # shape (samples, 128, 216, 1)

print(f"Data shape: {X.shape}, Labels shape: {y.shape}")

# Step 4: Encode labels
le = LabelEncoder()
y_encoded = le.fit_transform(y)
print("Classes:", le.classes_)

# Step 5: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)
print(f"Train samples: {len(X_train)}, Test samples: {len(X_test)}")

# Step 6: Define CNN+LSTM model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(128, 216, 1)),
    BatchNormalization(),
    MaxPooling2D((2, 2)),

    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),

    TimeDistributed(Flatten()),
    LSTM(128),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(len(le.classes_), activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()

# Step 7: Callbacks
callbacks = [
    EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
    ModelCheckpoint("best_model.h5", save_best_only=True)
]

# Step 8: Train model
history = model.fit(
    X_train, y_train,
    validation_split=0.1,
    epochs=40,
    batch_size=32,
    callbacks=callbacks
)

# Step 9: Evaluate on test set
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test accuracy: {test_acc:.4f}")

Label 'hungry' has 382 files
Label 'burping' has 8 files
Label 'discomfort' has 27 files
Label 'belly_pain' has 16 files
Label 'tired' has 24 files
Total audio files found: 457
Data shape: (457, 128, 216, 1), Labels shape: (457,)
Classes: ['belly_pain' 'burping' 'discomfort' 'hungry' 'tired']
Train samples: 365, Test samples: 92


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/40
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - accuracy: 0.6119 - loss: 1.0973



[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 3s/step - accuracy: 0.6239 - loss: 1.0799 - val_accuracy: 0.0270 - val_loss: 2.1245
Epoch 2/40
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 3s/step - accuracy: 0.8330 - loss: 0.7430 - val_accuracy: 0.0270 - val_loss: 2.2924
Epoch 3/40
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - accuracy: 0.8295 - loss: 0.7584



[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 3s/step - accuracy: 0.8303 - loss: 0.7571 - val_accuracy: 0.0270 - val_loss: 1.7523
Epoch 4/40
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - accuracy: 0.8557 - loss: 0.6560



[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 3s/step - accuracy: 0.8545 - loss: 0.6591 - val_accuracy: 0.0270 - val_loss: 1.4200
Epoch 5/40
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - accuracy: 0.8483 - loss: 0.7269



[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 3s/step - accuracy: 0.8475 - loss: 0.7284 - val_accuracy: 0.8919 - val_loss: 0.6932
Epoch 6/40
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - accuracy: 0.8302 - loss: 0.7603



[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 3s/step - accuracy: 0.8309 - loss: 0.7575 - val_accuracy: 0.8919 - val_loss: 0.5935
Epoch 7/40
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - accuracy: 0.8488 - loss: 0.6790



[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 3s/step - accuracy: 0.8477 - loss: 0.6818 - val_accuracy: 0.8919 - val_loss: 0.5521
Epoch 8/40
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 3s/step - accuracy: 0.8566 - loss: 0.6688 - val_accuracy: 0.8919 - val_loss: 0.5680
Epoch 9/40
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 3s/step - accuracy: 0.8265 - loss: 0.7378 - val_accuracy: 0.8919 - val_loss: 0.5523
Epoch 10/40
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4s/step - accuracy: 0.8375 - loss: 0.7098



[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 4s/step - accuracy: 0.8376 - loss: 0.7119 - val_accuracy: 0.8919 - val_loss: 0.5470
Epoch 11/40
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.8621 - loss: 0.6371



[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 3s/step - accuracy: 0.8601 - loss: 0.6437 - val_accuracy: 0.8919 - val_loss: 0.5422
Epoch 12/40
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 3s/step - accuracy: 0.8340 - loss: 0.7163 - val_accuracy: 0.8919 - val_loss: 0.5508
Epoch 13/40
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 3s/step - accuracy: 0.8276 - loss: 0.7232 - val_accuracy: 0.8919 - val_loss: 0.5607
Epoch 14/40
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 3s/step - accuracy: 0.8433 - loss: 0.6818 - val_accuracy: 0.8919 - val_loss: 0.5522
Epoch 15/40
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 3s/step - accuracy: 0.8511 - loss: 0.6679 - val_accuracy: 0.8919 - val_loss: 0.5825
Epoch 16/40
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 3s/step - accuracy: 0.8374 - loss: 0.7105 - val_accuracy: 

In [20]:
import os
import numpy as np
import librosa
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.class_weight import compute_class_weight
import tensorflow as tf
import joblib

# Parameters
dataset_path = "/kaggle/input/infant-cry-audio-corpus/donateacry_corpus"
max_len = 216
n_mels = 128

# Feature extraction
def extract_mel(file_path, max_len=216):
    y, sr = librosa.load(file_path, sr=22050)
    mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels)
    mel_db = librosa.power_to_db(mel, ref=np.max)
    if mel_db.shape[1] < max_len:
        pad_width = max_len - mel_db.shape[1]
        mel_db = np.pad(mel_db, pad_width=((0, 0), (0, pad_width)), mode='constant')
    else:
        mel_db = mel_db[:, :max_len]
    return mel_db

# Load dataset
X, y = [], []
for label in os.listdir(dataset_path):
    class_dir = os.path.join(dataset_path, label)
    for file in os.listdir(class_dir):
        if file.endswith(".wav"):
            path = os.path.join(class_dir, file)
            try:
                features = extract_mel(path, max_len)
                X.append(features)
                y.append(label)
            except Exception as e:
                print(f"Failed to process {file}: {e}")

X = np.array(X)
X = np.expand_dims(X, axis=-1)
y = np.array(y)

# Encode labels
le = LabelEncoder()
y_encoded = le.fit_transform(y)
joblib.dump(le, "label_encoder.pkl")

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Compute class weights
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)
class_weights = dict(enumerate(class_weights))

# Model architecture: CNN + LSTM
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, TimeDistributed, LSTM, Dense, Dropout, Flatten, BatchNormalization, Reshape

model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(128, max_len, 1)),
    BatchNormalization(),
    MaxPooling2D((2, 2)),

    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(len(le.classes_), activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train model
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=30,
    batch_size=32,
    class_weight=class_weights,
    callbacks=[early_stop]
)

model.save("infant_cry_model.h5")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/30
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 2s/step - accuracy: 0.2207 - loss: 61.0759 - val_accuracy: 0.0761 - val_loss: 38.5899
Epoch 2/30
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 2s/step - accuracy: 0.3552 - loss: 9.0637 - val_accuracy: 0.5978 - val_loss: 15.3509
Epoch 3/30
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 2s/step - accuracy: 0.3326 - loss: 2.9078 - val_accuracy: 0.8043 - val_loss: 18.1390
Epoch 4/30
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 2s/step - accuracy: 0.3049 - loss: 0.9996 - val_accuracy: 0.7717 - val_loss: 7.7332
Epoch 5/30
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 2s/step - accuracy: 0.3871 - loss: 0.6927 - val_accuracy: 0.8043 - val_loss: 6.0452
Epoch 6/30
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 2s/step - accuracy: 0.7448 - loss: 0.6471 - val_accuracy: 0.8043 - val_loss: 9.9931
Epoch 7/30
[1m12/12[0m [32m━━━━━━



In [30]:

import joblib

# Save model and label encoder
model.save("infant_cry_model.h5")
joblib.dump(le, "label_encoder.pkl")






['label_encoder.pkl']

In [31]:
pip install streamlit




In [32]:
!pip install streamlit pyngrok librosa scikit-learn tensorflow




In [33]:
!pip install streamlit pyngrok librosa scikit-learn tensorflow




In [34]:
!pip install streamlit pyngrok librosa joblib




In [35]:
!pip install streamlit pyngrok librosa joblib




In [36]:
%%writefile app.py
import streamlit as st
import numpy as np
import librosa
import tensorflow as tf
import joblib

model = tf.keras.models.load_model("infant_cry_model.h5")
label_encoder = joblib.load("label_encoder.pkl")
max_len = 216
n_mels = 128

def extract_mel(file_path, max_len=max_len):
    y, sr = librosa.load(file_path, sr=22050)
    mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels)
    mel_db = librosa.power_to_db(mel, ref=np.max)
    if mel_db.shape[1] < max_len:
        pad_width = max_len - mel_db.shape[1]
        mel_db = np.pad(mel_db, pad_width=((0, 0), (0, pad_width)), mode='constant')
    else:
        mel_db = mel_db[:, :max_len]
    return mel_db

def predict(file_path):
    features = extract_mel(file_path)
    features = np.expand_dims(features, axis=-1)
    features = np.expand_dims(features, axis=0)
    pred = model.predict(features)
    class_index = np.argmax(pred)
    class_label = label_encoder.inverse_transform([class_index])[0]
    confidence = pred[0][class_index]
    return class_label, confidence

st.title("👶 Infant Cry Audio Classification")

uploaded_file = st.file_uploader("Upload a WAV file", type=["wav"])
if uploaded_file is not None:
    st.audio(uploaded_file, format="audio/wav")
    with open("temp.wav", "wb") as f:
        f.write(uploaded_file.read())
    label, confidence = predict("temp.wav")
    st.success(f"Predicted Cry Type: **{label}**")
    st.write(f"Confidence: {confidence:.2f}")


Overwriting app.py


In [37]:
!streamlit run app.py &>/content/log.txt &


In [38]:
from pyngrok import ngrok

# Make sure to use your actual valid token here
ngrok.set_auth_token("2zAbizTB3SzPsi3hhl2GeFWX1CU_3Y7e6RYCV2uycryGNPQ7e")

# ✅ Correct usage:
public_url = ngrok.connect(8501)  # Not `port=8501`, just pass 8501 directly
print("🔗 Streamlit URL:", public_url)

🔗 Streamlit URL: NgrokTunnel: "https://b6f7-34-75-58-165.ngrok-free.app" -> "http://localhost:8501"
