In [1]:
import os
import torchaudio
import librosa
import numpy as np
from concurrent.futures import ThreadPoolExecutor
import torch

In [2]:
SAMPLE_RATE = 16000
N_MFCC = 13
N_FFT = 400
HOP_LENGTH = 160
N_MELS = 128
DATA_PATH = '/scratch/as20482/ML_Final_Proj/AudioSet-classification/Data'

In [3]:
def extract_mfcc_features(waveforms):
    mfcc_transform = torchaudio.transforms.MFCC(
        sample_rate=SAMPLE_RATE,
        n_mfcc=N_MFCC,
        melkwargs={"n_fft": N_FFT, "hop_length": HOP_LENGTH, "n_mels": N_MELS}
    )
    return mfcc_transform(waveforms).numpy()

In [5]:
def extract_mel_spectrogram_features(waveforms):
    mel_spectrogram_transform = torchaudio.transforms.MelSpectrogram(
        sample_rate=SAMPLE_RATE,
        n_fft=N_FFT,
        hop_length=HOP_LENGTH,
        n_mels=N_MELS
    )
    return mel_spectrogram_transform(waveforms).numpy()

In [6]:
def extract_chroma_features(waveforms):
    def process_waveform(waveform):
        return librosa.feature.chroma_stft(y=waveform.numpy(), sr=SAMPLE_RATE, n_fft=N_FFT, hop_length=HOP_LENGTH)
    with ThreadPoolExecutor() as executor:
        chroma_features = list(executor.map(process_waveform, waveforms))
    return np.array(chroma_features)

In [7]:
def extract_zero_crossing_rate(waveform, sample_rate):
    with ThreadPoolExecutor() as executor:
        zero_crossing_rate = list(executor.map(librosa.feature.zero_crossing_rate, waveforms))
    return np.array(zero_crossing_rate)

In [4]:
from sklearn.multioutput import MultiOutputClassifier
from sklearn.linear_model import LogisticRegression

model = MultiOutputClassifier(LogisticRegression(max_iter=1000))

In [5]:
data = torch.load(os.path.join(DATA_PATH, 'train', 'resampled_waveforms.pt'))

  data = torch.load(os.path.join(DATA_PATH, 'train', 'resampled_waveforms.pt'))


In [6]:
labels = torch.load(os.path.join(DATA_PATH, 'train', 'labels.pt'))

  labels = torch.load(os.path.join(DATA_PATH, 'train', 'labels.pt'))


In [7]:
data.shape

torch.Size([20550, 1, 160172])

In [16]:
labels.shape

torch.Size([20550, 527])

In [10]:
X_tr = extract_mfcc_features(data[:100,:,:]).mean(-1).squeeze()

In [11]:
X_tr.shape

(100, 13)

In [13]:
y_tr = labels[:100,:].numpy()

In [20]:
X_tr = X_tr.mean(-1)

In [21]:
X_tr.shape

(20550,)

In [14]:
y_tr.shape

(100, 527)

In [21]:
a=np.random.randn(2,3)

array([[-1.42617657,  0.02501505,  0.25278759],
       [ 1.41956775,  0.16828626,  2.37647624]])

In [24]:
np.ones((2,3))

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [27]:
model.fit(np.random.randn(2,3), np.ones((2,5)))

ValueError: This solver needs samples of at least 2 classes in the data, but the data contains only one class: np.float64(1.0)

In [28]:
from sklearn.multioutput import MultiOutputClassifier
from sklearn.ensemble import RandomForestClassifier

In [29]:
forest = RandomForestClassifier(random_state=1)
multi_target_forest = MultiOutputClassifier(forest, n_jobs=2)

In [32]:
multi_target_forest.fit(X_tr, y_tr).predict(X_tr) == y_tr

array([[ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       ...,
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True]])