# Data import 

Where I develop the functions to import the data - from .wav files, into tensors.

In [11]:
import os
from dotenv import load_dotenv
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

import torch
from torch.utils.data import DataLoader, TensorDataset
import torchaudio

load_dotenv()

True

In [12]:
data_dir = os.getenv("DATA_DIR")
temp_file = '1.wav'

In [13]:
transform = torchaudio.transforms.MelSpectrogram(16000)



In [14]:
def pad_tensor(tensor, target_length):
    '''
    Given a tensor and a target length, pads the tensor so that its length a multiple of the target length -
    so that it can be split into equal parts.

    Parameters
    ----------
    tensor : torch.tensor
        The tensor to pad
    target_length : int
        The target length to pad the tensor to

    Returns
    -------
    torch.tensor
        The padded tensor
    '''
    
    _, length = tensor.shape
    if length % target_length != 0:
        # Calculate padding needed
        padding_needed = target_length - (length % target_length)
        # Pad the tensor
        tensor = torch.nn.functional.pad(tensor, (0, padding_needed))
    return tensor

In [15]:
def load_file(file, target_length=80):
    '''
    Given a file path, loads the file, creates a mel spectrogram, pads it to be cleanly divisible by target_length,
    and splits it into chunks of target_length.

    Parameters
    ----------
    file : str
        The file path to load
    target_length : int
        The target length to split the tensor into

    Returns
    -------
    tuple of torch.tensor
        A tuple of tensors, each of length target_length
    '''

    # load wav file
    waveform, _ = torchaudio.load(file, normalize=True)
    waveform = waveform.squeeze(0)

    # create mel spectrogram
    mel_specgram = transform(waveform)

    # pad tensor so it's cleanly divisible by target_length
    padded_tensor = pad_tensor(mel_specgram, target_length)

    # return the tensor, split into target_length chunks
    return padded_tensor.split(target_length, dim=1)

In [16]:
# load wav file
waveform, sample_rate = torchaudio.load(os.path.join(data_dir, temp_file), normalize=True)
waveform = waveform.squeeze()

In [17]:
mel_specgram = transform(waveform)

In [18]:
mel_specgram.shape

torch.Size([128, 888])

In [19]:
new_tens = pad_tensor(mel_specgram, 80)

In [20]:
len(new_tens.split(80, dim=1))

12

In [21]:
tensors_loaded = load_file(os.path.join(data_dir, temp_file))

In [None]:
def load_dataset(meta_file, 
                 target_length=80, 
                 scale=False,
                 scaler=None,
                 files_to_load='all'):
    '''
    Given a meta file, loads in the dataset.

    Parameters
    ----------
    meta_file : str
        The meta file to load
    target_length : int | 80
        The target length to split the tensor into
    scale : bool | False
        Whether to scale the data
    scaler : sklearn.preprocessing.StandardScaler | None
        If provided, the scaler to use to scale. If not provided, a new scaler is created.
    files_to_load : int or 'all' | 'all'
        The number of files to load. If 'all', all files are loaded.

    Returns
    -------
    tuple of torch.tensor
        A tuple of tensors - the positives and negatives
    '''

    positives = []
    negatives = []

    # Load the meta file
    meta = pd.read_csv(meta_file)

    # iterate through the files
    for i, row in enumerate(meta.itertuples()):
        
        # If we're only loading a subset of the files, check if we've loaded enough
        if files_to_load != 'all' and i >= files_to_load:
            break

        # Load the file
        file_tensors = load_file(os.path.join(data_dir, row.file), target_length=target_length)

        # Add the tensors to the appropriate list
        for tensor in file_tensors:
            if row.numeric_label == 1:
                positives.append(tensor)
            else:
                negatives.append(tensor)

    # Create labels
    positive_labels = torch.tensor([[1.0, 0.0]] * len(positives))
    negative_labels = torch.tensor([[0.0, 1.0]] * len(negatives))

    # Combine the inputs
    data = torch.cat((torch.stack(positives), torch.stack(negatives)), dim=0)
    # Combine the labels
    labels = torch.cat((positive_labels, negative_labels), dim=0)

    """ # Scale the data
    if scale:

        if scaler:
            data = scaler.transform(data)
        else:
            scaler = StandardScaler()
            data = scaler.fit_transform(data) """


    # Create a dataset
    return TensorDataset(data, labels)

In [23]:
def get_dataloader(dataset, batch_size=32):
    '''
    Given a dataset and a batch size, creates a DataLoader object.

    Parameters
    ----------
    dataset : TensorDataset
        The dataset to create a DataLoader for
    batch_size : int
        The batch size

    Returns
    -------
    DataLoader
        A DataLoader object containing the dataset
    '''

    # Create a dataloader
    return DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [28]:
dataset = load_dataset('meta/train.csv', files_to_load=100)

In [29]:
dataloader = get_dataloader(dataset, batch_size=32)

In [30]:
for batch_data, batch_labels in dataloader:
    print(f"Batch data shape: {batch_data.shape}")
    print(f"Batch labels: {batch_labels}")

Batch data shape: torch.Size([32, 128, 80])
Batch labels: tensor([[0., 1.],
        [1., 0.],
        [1., 0.],
        [0., 1.],
        [1., 0.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [1., 0.],
        [1., 0.],
        [0., 1.],
        [1., 0.],
        [1., 0.],
        [1., 0.],
        [1., 0.],
        [1., 0.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [1., 0.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [1., 0.],
        [1., 0.],
        [0., 1.],
        [0., 1.],
        [1., 0.],
        [0., 1.],
        [1., 0.]])
Batch data shape: torch.Size([32, 128, 80])
Batch labels: tensor([[1., 0.],
        [1., 0.],
        [0., 1.],
        [1., 0.],
        [0., 1.],
        [1., 0.],
        [1., 0.],
        [0., 1.],
        [0., 1.],
        [1., 0.],
        [1., 0.],
        [1., 0.],
        [1., 0.],
        [0., 1.],
        [0., 1.],
        [1., 0.],
        [0., 1.],
 