In [1]:
from datasets import load_dataset
from torch.utils.data import DataLoader, Dataset
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from fastprogress.fastprogress import master_bar, progress_bar

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SequenceClassifier(nn.Module):
    def __init__(self):
        super(SequenceClassifier, self).__init__()
        # Embedding layer: 1024 input dimensions, 6 output dimensions
        self.embedding = nn.Embedding(num_embeddings=1024, embedding_dim=6)
        
        # 1D Convolutional layers
        self.conv1 = nn.Conv1d(in_channels=6, out_channels=12, kernel_size=3, stride=3)
        self.conv2 = nn.Conv1d(in_channels=12, out_channels=24, kernel_size=5, stride=4)
        self.conv3 = nn.Conv1d(in_channels=24, out_channels=48, kernel_size=5, stride=4)
        
        # 2D Convolutional layers
        self.conv2d_1 = nn.Conv2d(in_channels=1, out_channels=8, kernel_size=5, stride=4)
        self.conv2d_2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3, stride=2)
        
        # Fully connected layers
        self.fc1 = nn.Linear(16*6*6, 576)  # Adjust the input size based on the output size of the last conv layer
        self.fc2 = nn.Linear(576, 144)
        self.fc3 = nn.Linear(144, 24)  # 23 classes + 1 catch-all class
        
    def forward(self, x):
        # Embedding layer
        x = self.embedding(x)  # Expected shape: [batch_size, seq_len, embedding_dim]
        
        # Permute to match Conv1D input expectations: [batch_size, channels, seq_len]
        x = x.permute(0, 2, 1)
        
        # 1D Convolutional layers
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        
        # Calculate the new size after convolutions for reshaping
        # The new length can be calculated based on the convolutions' output size
        # Here we'll need to manually calculate or adjust based on your specific sequence lengths and strides
        conv_output_length = self._calculate_conv_output_length(x.shape[-1])
        x = x.view(-1, 1, conv_output_length, conv_output_length)  # Adjust the shape according to your actual output
        
        # 2D Convolutional layers
        x = F.relu(self.conv2d_1(x))
        x = F.relu(self.conv2d_2(x))
        
        # Flatten before passing to fully connected layers
        x = torch.flatten(x, 1)
        
        # Fully connected layers with ReLU activations
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        
        # Output layer
        x = self.fc3(x)
        
        return x

    def _calculate_conv_output_length(self, L):
        """
        Calculate the output length of the convolutional layers.
        You might need to adjust this calculation based on the actual strides, padding, and dilation of your conv layers.
        This is a placeholder function. Replace it with your actual calculation.
        """
        # Placeholder calculation; replace with your actual calculation
        return int((L - 4) / 4)  # Example calculation; adjust accordingly

In [3]:
model = SequenceClassifier().cuda()  # Move model to GPU

In [4]:
class CustomDataset(Dataset):
    def __init__(self, hf_dataset, target_length=2304):
        self.hf_dataset = hf_dataset
        self.target_length = target_length  # Ensure this is correctly defined

    def __len__(self):
        return len(self.hf_dataset)

    def __getitem__(self, idx):
        item = self.hf_dataset[idx]
        codes = item['codes']
        label = item['label']

        # Perform adaptive average pooling on the sequence
        # `codes` is expected to be a 3D tensor with shape [1, channels, seq_len]
        # We pool over the seq_len dimension to resize it to `target_length`
        codes_pooled = F.adaptive_avg_pool1d(codes.squeeze(0), self.target_length).squeeze(0)

        return codes_pooled, label

In [5]:
dataset = load_dataset("danjacobellis/audio_har_descript_24kHz",split="train")
dataset = dataset.with_format("torch")
torch_dataset = CustomDataset(dataset)
dataloader = DataLoader(torch_dataset, batch_size=10, shuffle=True)

In [9]:
def plot_loss_update(epoch, epochs, mb, train_loss):
    """Update the loss plot dynamically during training."""
    x = range(1, epoch + 1)
    y = train_loss
    graphs = [[x, y]]
    x_margin = 0.2
    y_margin = 0.05
    x_bounds = [1 - x_margin, epochs + x_margin]
    y_bounds = [np.min(y) - y_margin, np.max(y) + y_margin]

    # Here we update the graph
    mb.update_graph(graphs, x_bounds, y_bounds)

In [10]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
epochs = 5
mb = master_bar(range(1, epochs + 1))
train_losses = []

for epoch in mb:
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(progress_bar(dataloader, parent=mb)):
        inputs, labels = inputs.cuda(), labels.cuda()
        inputs = inputs.squeeze(1)  # Adjust dimensions as necessary
        
        optimizer.zero_grad()
        
        outputs = model(inputs.long())
        loss = criterion(outputs, labels)
        
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    epoch_loss = running_loss / len(dataloader)
    train_losses.append(epoch_loss)
    mb.write(f'Epoch {epoch}/{epochs}, Training Loss: {epoch_loss:.4f}')
    
    # Update the loss plot
    plot_loss_update(epoch, epochs, mb, train_losses)
    
print("Finished Training")

RuntimeError: permute(sparse_coo): number of dimensions in the tensor input does not match the length of the desired ordering of dimensions i.e. input.dim() = 4 is not equal to len(dims) = 3

In [24]:
dataset[0]['codes'][0]

tensor([[ 777,  528,  245,  ...,  189,  189,  665],
        [ 366,  837,   23,  ...,  324,  332,  658],
        [ 493,  266,  585,  ...,  964,  155,  707],
        ...,
        [ 334, 1011,  183,  ...,  572,  912,  825],
        [ 344,  534,  410,  ...,  277,  749,  544],
        [  59,  845,  288,  ...,  270,  857, 1021]])

In [28]:
dataset[0]['codes'][0].squeeze()

tensor([[ 777,  528,  245,  ...,  189,  189,  665],
        [ 366,  837,   23,  ...,  324,  332,  658],
        [ 493,  266,  585,  ...,  964,  155,  707],
        ...,
        [ 334, 1011,  183,  ...,  572,  912,  825],
        [ 344,  534,  410,  ...,  277,  749,  544],
        [  59,  845,  288,  ...,  270,  857, 1021]])

In [None]:
torch.squeeze