In [1]:
from datasets import load_dataset
from torch.utils.data import DataLoader, Dataset
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from fastprogress.fastprogress import master_bar, progress_bar

In [6]:
class SequenceModel(nn.Module):
    def __init__(self):
        super(SequenceModel, self).__init__()
        self.num_positions = 9
        self.num_embeddings = 1024
        self.embedding_dim = 6
        
        self.embeddings = nn.ModuleList([
            nn.Embedding(num_embeddings=self.num_embeddings, embedding_dim=self.embedding_dim) 
            for _ in range(self.num_positions)
        ])
        
        self.conv1 = nn.Conv1d(in_channels=54,  out_channels=72,  kernel_size=3, stride=3)
        self.conv2 = nn.Conv1d(in_channels=72,  out_channels=120, kernel_size=3, stride=3)
        
        # Fully connected layers
        self.fc1 = nn.Linear(120, 120)
        self.fc2 = nn.Linear(120, 24)
    
    def forward(self, x):
        batch_size = x.shape[0]
        embeddings = [self.embeddings[i](x[:, :, i]) for i in range(self.num_positions)]
        embeddings = torch.stack(embeddings, dim=3)
        reshaped = embeddings.view(batch_size, 54, -1)
        x = F.relu(self.conv1(reshaped))
        x = F.relu(self.conv2(x))
        x = x.view(batch_size, -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        
        return x

In [7]:
dataset = load_dataset("danjacobellis/audio_har_descript_44kHz_frames",split='train').with_format("torch")
model = SequenceModel()

In [15]:
example_input = dataset[0:5]['codes'].to(torch.int)

In [16]:
with torch.no_grad():
    y = model(example_input)