In [None]:
import os
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
import time
import random
from model import EnhancedAudioCNN, train_model, eval_model

In [None]:
device = torch.device("cpu")  # or "cuda" if using GPU
print(f"Using device: {device}")

model = EnhancedAudioCNN().to(device)
model.load_state_dict(torch.load("audio_classification_model_augmented.pth", map_location=device))
model.eval()  # Set model to evaluation mode

EnhancedAudioCNN(
  (features): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU()
    (10): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU()
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padd

In [None]:
transform = transforms.Compose([transforms.Resize((224, 224)),transforms.ToTensor()])
# Run inferences on test spectrogram images
print("\nRunning inferences on test spectrogram images:")
model.eval()
inference_dir = r"C:\Users\User\Documents\audio-recognition-master\audio-recognition-master\img_dataset\inferences"
inference_dir = r"C:\Users\User\Documents\audio-recognition-master\audio-recognition-master\img_dataset\test\cat"

for spectrogram in os.listdir(inference_dir):
    if not spectrogram.endswith(".jpg"):
        continue
    try:
        # Load the spectrogram image
        img_path = os.path.join(inference_dir, spectrogram)
        image = Image.open(img_path).convert("RGB")
        # Apply transforms
        img_tensor = transform(image)
        # For CNN, we can use the image tensor directly
        # Add batch dimension
        features = img_tensor.unsqueeze(0).to(device)
        # Make prediction
        pred = model(features)
        if pred[0, 0] < 0.5:
            label = "cat"
        else:
            label = "dog"
        print(f"for {spectrogram}, the prediction is {label}.")
    except Exception as e:
        print(f"Error processing {spectrogram}: {e}")
        continue


Running inferences on test spectrogram images:
for cat_126.jpg, the prediction is cat.
for cat_137.jpg, the prediction is cat.
for cat_143.jpg, the prediction is cat.
for cat_144.jpg, the prediction is dog.
for cat_152.jpg, the prediction is cat.
for cat_158.jpg, the prediction is cat.
for cat_17.jpg, the prediction is cat.
for cat_20.jpg, the prediction is cat.
for cat_66.jpg, the prediction is cat.
for cat_67.jpg, the prediction is cat.
for cat_75.jpg, the prediction is dog.
for cat_86.jpg, the prediction is cat.
for cat_90.jpg, the prediction is cat.
