In [2]:
import os
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim

In [3]:
df = pd.read_csv('dataset.csv')
spectrograms_dir = 'Spectrograms'
X = []
y = []
for index, file in enumerate(os.listdir(spectrograms_dir)):
    if file.endswith('.npy'):
        spectrogram = np.load(os.path.join(spectrograms_dir, file))
        y.append(df.iloc[index]['label'])
        X.append(spectrogram)
    

In [4]:
class SpectrogramDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return X[idx] , y[idx]

In [5]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(len(X_train), len(X_test))

371 93


In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

X_train_tensor = torch.tensor(X_train, dtype=torch.float32, device=device)
y_train_tensor = torch.tensor(y_train, dtype=torch.long, device=device)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32, device=device)
y_test_tensor = torch.tensor(y_test, dtype=torch.long, device=device)


  X_train_tensor = torch.tensor(X_train, dtype=torch.float32, device=device)


In [7]:
train_dataset = SpectrogramDataset(X_train_tensor, y_train_tensor)
test_dataset = SpectrogramDataset(X_test_tensor, y_test_tensor)

In [8]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [9]:
# Define the LSTM model
class LSTMClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTMClassifier, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        # LSTM layer
        self.lstm = nn.LSTM(input_size=input_size, 
                          hidden_size=hidden_size, 
                          num_layers=num_layers, 
                          batch_first=True)
        
        # Fully connected layer for classification
        self.fc = nn.Linear(hidden_size, num_classes)
        
        # Dropout for regularization
        self.dropout = nn.Dropout(0.3)

    def forward(self, x):
        # x shape: (batch_size, 128, 251) -> (batch_size, seq_len=128, input_size=251)
        
        # Initialize hidden state and cell state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        
        # LSTM forward pass
        out, _ = self.lstm(x, (h0, c0))  # out: (batch_size, seq_len, hidden_size)
        
        # Take the output from the last time step
        out = out[:, -1, :]  # (batch_size, hidden_size)
        
        # Apply dropout
        out = self.dropout(out)
        
        # Fully connected layer
        out = self.fc(out)  # (batch_size, num_classes)
        return out

In [None]:
# Model hyperparameters
input_size = 251  # Number of frequency bins in spectrogram
hidden_size = 128  # LSTM hidden units
num_layers = 2     # Number of LSTM layers
num_classes = 2    # Adjust based on your number of classes (e.g., 2 for binary)

# Initialize model, loss function, and optimizer
model = LSTMClassifier(input_size, hidden_size, num_layers, num_classes).to(device)
criterion = nn.CrossEntropyLoss()  # For multi-class; use nn.BCEWithLogitsLoss for binary
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [11]:
# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        # Backward pass and optimize
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    # Print epoch loss
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')

# Evaluation
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Test Accuracy: {accuracy:.2f}%')

Epoch [1/10], Loss: 0.6541
Epoch [2/10], Loss: 0.6335
Epoch [3/10], Loss: 0.5925
Epoch [4/10], Loss: 0.5297
Epoch [5/10], Loss: 0.4993
Epoch [6/10], Loss: 0.4407
Epoch [7/10], Loss: 0.3617
Epoch [8/10], Loss: 0.3222
Epoch [9/10], Loss: 0.3148
Epoch [10/10], Loss: 0.3447
Test Accuracy: 89.25%


In [14]:
# Example: Load a new spectrogram (replace with your actual data)
new_spectrogram = np.load('Spectrograms/490_clip_0.npy')  # Shape: (128, 251)
new_spectrogram_tensor = torch.tensor(new_spectrogram, dtype=torch.float32).unsqueeze(0).to(device)  # Shape: (1, 128, 251)

with torch.no_grad():
    output = model(new_spectrogram_tensor)
    _, predicted_class = torch.max(output, 1)
    predicted_class = predicted_class.item()

print(f"Predicted Class for New Spectrogram: {predicted_class}")


Predicted Class for New Spectrogram: 0
