In [None]:
import os
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim


In [74]:
# Load dataset
df = pd.read_csv('dataset.csv')
spectrograms_dir = 'Spectrograms'
X = []
y = []
for index, file in enumerate(os.listdir(spectrograms_dir)):
    if file.endswith('.npy'):
        spectrogram = np.load(os.path.join(spectrograms_dir, file))
        y.append(float(df.iloc[index]['label']))  # Convert to float explicitly
        X.append(spectrogram)

# Convert to numpy arrays
X = np.array(X, dtype=np.float32)  # Ensure float32 for spectrograms
y = np.array(y, dtype=np.float32)  # Ensure float32 for labels
print(f"Loaded {len(X)} spectrograms and {len(y)} labels")

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(f"Training set: {len(X_train)} samples, Test set: {len(X_test)} samples")

# Convert to tensors
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
X_train_tensor = torch.tensor(X_train, dtype=torch.float32, device=device)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32, device=device)  # Ensure float32
X_test_tensor = torch.tensor(X_test, dtype=torch.float32, device=device)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32, device=device)  # Ensure float32

# Define dataset
class SpectrogramDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = torch.tensor(y, dtype=torch.float32)  # Ensure float32

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Create datasets and dataloaders
train_dataset = SpectrogramDataset(X_train_tensor, y_train_tensor)
test_dataset = SpectrogramDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Define the CNN model
class CNNClassifier(nn.Module):
    def __init__(self):
        super(CNNClassifier, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, stride=1, padding=1)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)  # Output: (64, 125)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)  # Output: (32, 62)
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.relu3 = nn.ReLU()
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)  # Output: (16, 31)
        self.fc1 = nn.Linear(64 * 16 * 31, 128)
        self.relu4 = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(128, 1)  # Output: 1 for binary classification

    def forward(self, x):
        x = x.unsqueeze(1)  # Add channel dimension: (batch, 1, 128, 251)
        x = self.pool1(self.relu1(self.conv1(x)))
        x = self.pool2(self.relu2(self.conv2(x)))
        x = self.pool3(self.relu3(self.conv3(x)))
        x = x.view(x.size(0), -1)  # Flatten
        x = self.relu4(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)  # No sigmoid here, applied in loss function
        return x

# Training function
def train_model(model, train_loader, criterion, optimizer, device, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs).squeeze()
            loss = criterion(outputs, labels)  # No sigmoid here
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            preds = torch.sigmoid(outputs) > 0.5
            correct += (preds == labels).sum().item()
            total += labels.size(0)
        epoch_loss = running_loss / len(train_loader)
        epoch_acc = correct / total
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}')

# Evaluation function
def evaluate_model(model, test_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs).squeeze()
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            preds = torch.sigmoid(outputs) > 0.5
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    test_loss = running_loss / len(test_loader)
    test_acc = correct / total
    print(f'Test Loss: {test_loss:.4f}, Test Accuracy: {test_acc:.4f}')

# Main execution
if __name__ == "__main__":
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = CNNClassifier().to(device)
    criterion = nn.BCEWithLogitsLoss()  # For binary classification
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Train and evaluate
    train_model(model, train_loader, criterion, optimizer, device, num_epochs=10)
    evaluate_model(model, test_loader, criterion, device)

Loaded 1883 spectrograms and 1883 labels
Training set: 1506 samples, Test set: 377 samples


  self.y = torch.tensor(y, dtype=torch.float32)  # Ensure float32


Epoch 1/10, Loss: 0.6885, Accuracy: 0.5744
Epoch 2/10, Loss: 0.6795, Accuracy: 0.5770
Epoch 3/10, Loss: 0.6622, Accuracy: 0.5910
Epoch 4/10, Loss: 0.6485, Accuracy: 0.6175
Epoch 5/10, Loss: 0.6286, Accuracy: 0.6521
Epoch 6/10, Loss: 0.6351, Accuracy: 0.6554
Epoch 7/10, Loss: 0.6476, Accuracy: 0.6182
Epoch 8/10, Loss: 0.6066, Accuracy: 0.6766
Epoch 9/10, Loss: 0.5947, Accuracy: 0.7005
Epoch 10/10, Loss: 0.5973, Accuracy: 0.6853
Test Loss: 0.6348, Test Accuracy: 0.6764


In [76]:
from sklearn.metrics import precision_score, recall_score, f1_score
# Evaluation function for precision, recall, and F1 score
def evaluate_metrics(model, test_loader, device):
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs).squeeze()
            preds = torch.sigmoid(outputs) > 0.5  # Convert logits to binary predictions
            all_preds.extend(preds.cpu().numpy().astype(int))
            all_labels.extend(labels.cpu().numpy().astype(int))
    
    # Calculate metrics
    precision = precision_score(all_labels, all_preds)
    recall = recall_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds)
    
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1 Score: {f1:.4f}')
# Evaluate metrics
evaluate_metrics(model, test_loader, device)

Precision: 0.8219
Recall: 0.3550
F1 Score: 0.4959
