#using cnn rnn

In [None]:
pip install librosa matplotlib numpy torch torchvision torchaudio tqdm


Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [None]:
from google.colab import files
import zipfile
import os

# Upload ZIP file
uploaded = files.upload()  # Manually select your ZIP file

# Define paths
zip_path = "/content/archive.zip"  # Replace with your actual file name
extract_to = "/content/extracted_folder"  # Destination folder

# Create directory if it doesn't exist
os.makedirs(extract_to, exist_ok=True)

# Extract ZIP file
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_to)

print(f"Extracted to: {extract_to}")


Saving archive.zip to archive.zip
Extracted to: /content/extracted_folder


In [None]:
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np

# Define dataset paths
AUDIO_DIR = "/content/extracted_folder/donateacry_corpus"  # Update with your dataset path
OUTPUT_DIR = "/content/spectrograms"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Convert each audio file to a spectrogram
for category in os.listdir(AUDIO_DIR):
    category_path = os.path.join(AUDIO_DIR, category)
    output_category_path = os.path.join(OUTPUT_DIR, category)
    os.makedirs(output_category_path, exist_ok=True)

    for audio_file in os.listdir(category_path):
        if audio_file.endswith(".wav"):
            audio_path = os.path.join(category_path, audio_file)

            # Load and convert to spectrogram
            y, sr = librosa.load(audio_path, sr=22050)
            mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
            mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)

            # Save as image
            plt.figure(figsize=(5, 5))
            librosa.display.specshow(mel_spec_db, sr=sr, x_axis="time", y_axis="mel")
            plt.axis("off")
            output_path = os.path.join(output_category_path, audio_file.replace(".wav", ".png"))
            plt.savefig(output_path, bbox_inches="tight", pad_inches=0)
            plt.close()

print("✅ Audio dataset converted to spectrograms!")


✅ Audio dataset converted to spectrograms!


In [None]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, random_split
from PIL import Image
import os

# Define paths
OUTPUT_DIR = "/content/spectrograms"  # Directory containing spectrogram images

# Define transforms
transform = transforms.Compose([
    transforms.Resize((128, 128)),  # Resize images to 128x128
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])  # Normalize images
])

# Load dataset
dataset = ImageFolder(root=OUTPUT_DIR, transform=transform)

# Split dataset (80% Train, 20% Validation)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

# Define the CNN-RNN model
class CNNRNNModel(nn.Module):
    def __init__(self, num_classes):
        super(CNNRNNModel, self).__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.rnn = nn.LSTM(input_size=128*16*16, hidden_size=256, num_layers=2, batch_first=True)
        self.fc = nn.Linear(256, num_classes)

    def forward(self, x):
        batch_size = x.size(0)
        x = self.cnn(x)
        x = x.view(batch_size, -1, 128*16*16)  # Flatten CNN output
        x, _ = self.rnn(x)
        x = x[:, -1, :]  # Take the last output of the RNN
        x = self.fc(x)
        return x

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize model, loss function, and optimizer
num_classes = len(dataset.classes)
model = CNNRNNModel(num_classes=num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

# Training loop
num_epochs = 15
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss / len(train_loader):.4f}")

# Save the trained model
torch.save(model.state_dict(), "cnn_rnn_baby_cry_model.pth")
print("✅ CNN-RNN Model Trained and Saved!")

# Evaluation loop
model.eval()
correct_predictions = 0
total_samples = 0
with torch.no_grad():
    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total_samples += labels.size(0)
        correct_predictions += (predicted == labels).sum().item()

accuracy = 100 * correct_predictions / total_samples
print(f"Accuracy of the CNN-RNN model: {accuracy:.2f}%")


Epoch 1/15, Loss: 1.1501
Epoch 2/15, Loss: 0.7283
Epoch 3/15, Loss: 0.6875
Epoch 4/15, Loss: 0.6790
Epoch 5/15, Loss: 0.6822
Epoch 6/15, Loss: 0.6834
Epoch 7/15, Loss: 0.6834
Epoch 8/15, Loss: 0.6833
Epoch 9/15, Loss: 0.6817
Epoch 10/15, Loss: 0.6815
Epoch 11/15, Loss: 0.6827
Epoch 12/15, Loss: 0.6853
Epoch 13/15, Loss: 0.6812
Epoch 14/15, Loss: 0.6840
Epoch 15/15, Loss: 0.6818
✅ CNN-RNN Model Trained and Saved!
Accuracy of the CNN-RNN model: 85.87%


In [None]:
import pickle
# Save the model's state_dict
torch.save(model.state_dict(), "cnn_rnn_baby_cry_model_state_dict.pth")
print("✅ Model's state_dict saved as 'cnn_rnn_baby_cry_model_state_dict.pth'")

# Save the model's state_dict using pickle
with open("cnn_rnn_baby_cry_model_state_dict.pkl", "wb") as f:
    pickle.dump(model.state_dict(), f)
print("✅ Model's state_dict also saved as 'cnn_rnn_baby_cry_model_state_dict.pkl'")

# To load the model's state_dict
# Initialize the model architecture
model = CNNRNNModel(num_classes=num_classes).to(device)

# Load the state_dict
with open("cnn_rnn_baby_cry_model_state_dict.pkl", "rb") as f:
    state_dict = pickle.load(f)

# Load parameters into the model
model.load_state_dict(state_dict)
print("✅ Model's state_dict loaded successfully from 'cnn_rnn_baby_cry_model_state_dict.pkl'")

# Set the model to evaluation mode
model.eval()

✅ Model's state_dict saved as 'cnn_rnn_baby_cry_model_state_dict.pth'
✅ Model's state_dict also saved as 'cnn_rnn_baby_cry_model_state_dict.pkl'
✅ Model's state_dict loaded successfully from 'cnn_rnn_baby_cry_model_state_dict.pkl'


CNNRNNModel(
  (cnn): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (rnn): LSTM(32768, 256, num_layers=2, batch_first=True)
  (fc): Linear(in_features=256, out_features=5, bias=True)
)

In [None]:
# # prompt: code to save the pkl of the code

# import pickle

# # Assuming your model is named 'model'
# # Save the model to a pickle file
# with open('model.pkl', 'wb') as f:
#     pickle.dump(model, f)

# # Download the pickle file
# from google.colab import files
# files.download('model.pkl')
