In [2]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
from torch.utils.data import DataLoader
from tqdm import tqdm


In [11]:
# Load a pre-trained ResNet-50 model
model = models.resnet50(pretrained=False)

# Modify the final layer to match the number of classes in your dataset (4 classes)
num_features = model.fc.in_features
model.fc = torch.nn.Linear(num_features, 4)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)



In [12]:
model_path = 'C:\\Users\\PC\\Desktop\\lisnen_research_files\\audio_research_crnn_files\\model_weights\\resnet_50.pth'
state_dict=torch.load(model_path)
model.load_state_dict(state_dict)

  state_dict=torch.load(model_path)


<All keys matched successfully>

In [13]:
model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [14]:
# Define the transformations for the test set
test_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load the test dataset
test_dataset_path = 'C:\\Users\\PC\\Desktop\\lisnen_data\\validation_data\\test'
test_dataset = datasets.ImageFolder(root=test_dataset_path, transform=test_transforms)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)


In [15]:
from sklearn.metrics import precision_score, recall_score, f1_score
import matplotlib.pyplot as plt
import numpy as np

def evaluate_model(model, dataloader, class_names):
    model.eval()  # Set model to evaluation mode
    running_corrects = 0
    all_preds = []
    all_labels = []
    wrong_predictions = []

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            outputs = outputs.to(device)
            _, preds = torch.max(outputs, 1)
            running_corrects += torch.sum(preds == labels.data)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            
            # Collect wrong predictions
            wrong_indices = np.where(preds.cpu().numpy() != labels.cpu().numpy())[0]
            for idx in wrong_indices:
                wrong_predictions.append((inputs.cpu()[idx], preds.cpu().numpy()[idx], labels.cpu().numpy()[idx]))

    accuracy = running_corrects.double() / len(dataloader.dataset)
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')

    print(f'Test Accuracy: {accuracy:.4f}')
    print(f'Test Precision: {precision:.4f}')
    print(f'Test Recall: {recall:.4f}')
    print(f'Test F1 Score: {f1:.4f}')
    
    return wrong_predictions,all_labels,all_preds

# Evaluate the model on the test data
class_names = ['Baby Cry','Door Bell','Door Knock', 'Fire Alarm']
wrong_predictions = evaluate_model(model, test_loader, class_names)




Test Accuracy: 0.9430
Test Precision: 0.9420
Test Recall: 0.9430
Test F1 Score: 0.9417
