In [4]:
from pathlib import Path 
import torchaudio
from speechbrain.pretrained import EncoderClassifier
import pandas as pd, numpy as np, matplotlib.pyplot as plt
from tqdm import tqdm
import IPython.display as display
from IPython.display import Audio
import glob 

# Inspect training data 

In [5]:
cats = ["dehumidifier", "gas_boiler", "lesker_system"]
files = []
for cls in cats:
    files += glob.glob(f"data/appliances/{cls}/*.wav")

In [6]:
df = pd.DataFrame(dict(filepath=files))
df["label"] = df.filepath.apply(lambda x: Path(x).parent.name)
df.sample(5)

Unnamed: 0,filepath,label
7,data/appliances/dehumidifier/9000018-11-0-0.wav,dehumidifier
56,data/appliances/lesker_system/2000060-10-0-0.wav,lesker_system
0,data/appliances/dehumidifier/9000009-11-0-0.wav,dehumidifier
35,data/appliances/gas_boiler/3000000-12-0-0.wav,gas_boiler
20,data/appliances/dehumidifier/9000000-11-0-0.wav,dehumidifier


In [7]:
sample = df.sample().iloc[0]; sample

filepath    data/appliances/dehumidifier/1000264-11-0-0.wav
label                                          dehumidifier
Name: 17, dtype: object

In [8]:
display.Audio(sample.filepath, autoplay=True)

In [9]:
df.label.value_counts()

label
dehumidifier     29
lesker_system    17
gas_boiler       12
Name: count, dtype: int64

In [10]:
def get_sample_of(cls: str, random_state=None):
    return df[df.label == cls].sample(random_state=random_state).iloc[0]
    
def play_sample(sample):
    print(sample.filepath, sample.label)
    return display.Audio(sample.filepath, autoplay=True)

def play_sample_of(cls: str, random_state=None):
    sample = get_sample_of(cls, random_state)
    return play_sample(sample)

In [11]:
play_sample_of("dehumidifier")

data/appliances/dehumidifier/9000003-11-0-0.wav dehumidifier


In [12]:
play_sample_of("lesker_system")

data/appliances/lesker_system/2000060-10-0-0.wav lesker_system


In [21]:
play_sample_of("gas_boiler")

data/appliances/gas_boiler/3000220-12-0-0.wav gas_boiler


## Train/val split

In [22]:
from sklearn.model_selection import train_test_split

In [23]:
train, val = train_test_split(df, stratify=df.label, random_state=0, test_size=0.17)
train.shape, val.shape

((48, 2), (10, 2))

In [150]:
df['isval'] = [i in val.index for i in df.index]

In [151]:
df[df.isval]

Unnamed: 0,filepath,label,isval
0,data/appliances/dehumidifier/9000009-11-0-0.wav,dehumidifier,True
3,data/appliances/dehumidifier/1000060-11-0-0.wav,dehumidifier,True
12,data/appliances/dehumidifier/1000220-11-0-0.wav,dehumidifier,True
15,data/appliances/dehumidifier/1000144-11-0-0.wav,dehumidifier,True
21,data/appliances/dehumidifier/1000481-11-0-0.wav,dehumidifier,True
29,data/appliances/gas_boiler/3000040-12-0-0.wav,gas_boiler,True
34,data/appliances/gas_boiler/3000012-12-0-0.wav,gas_boiler,True
41,data/appliances/lesker_system/2000180-10-0-0.wav,lesker_system,True
42,data/appliances/lesker_system/2000040-10-0-0.wav,lesker_system,True
50,data/appliances/lesker_system/2000312-10-0-0.wav,lesker_system,True


In [24]:
train.label.value_counts()

label
dehumidifier     24
lesker_system    14
gas_boiler       10
Name: count, dtype: int64

In [25]:
val.label.value_counts()

label
dehumidifier     5
lesker_system    3
gas_boiler       2
Name: count, dtype: int64

## Speechbrain urbansound model 

In [26]:
from speechbrain.pretrained import EncoderClassifier

In [27]:
classifier = EncoderClassifier.from_hparams(
    source="speechbrain/urbansound8k_ecapa", 
    savedir="models/gurbansound8k_ecapa"
)

out_prob, score, index, text_lab = classifier.classify_file('speechbrain/urbansound8k_ecapa/dog_bark.wav')
print(text_lab)

['dog_bark']




For fun, I'm just going to run the classifier which is predicting urban sounds, and see what it says on random samples from our new dataset - it's interesting what it predicts! 

In [28]:
sample = get_sample_of("lesker_system")
classifier.classify_file(sample.filepath)

(tensor([[-0.4594, -0.0352,  0.1178, -0.0107,  0.0513,  0.0549,  0.5444,  0.2243,
           0.1966, -0.2633]]),
 tensor([0.5444]),
 tensor([6]),
 ['engine_idling'])

In [29]:
play_sample(sample)

data/appliances/lesker_system/2000480-10-0-0.wav lesker_system


In [30]:
sample = get_sample_of("dehumidifier")
classifier.classify_file(sample.filepath)

(tensor([[-0.4567, -0.4336,  0.4180, -0.1501,  0.0327,  0.0225,  0.2796,  0.3624,
           0.3220, -0.0888]]),
 tensor([0.4180]),
 tensor([2]),
 ['air_conditioner'])

In [31]:
play_sample(sample)

data/appliances/dehumidifier/9000028-11-0-0.wav dehumidifier


## From audio file to tensor to model 

In [41]:
import torchaudio
import torch

different samples have different length tensor (longer or shorter time series) 

In [36]:
sample = get_sample_of("dehumidifier")
tens, size = torchaudio.load(sample.filepath)
tens.shape, size

(torch.Size([1, 48000]), 16000)

the classifier's `load_audio` function does some extra stuff like normalization, so let's use that 


In [38]:
tens = classifier.load_audio(sample.filepath)
tens.shape

torch.Size([48000])

In [39]:
tens.unsqueeze(0).shape

torch.Size([1, 48000])

In [42]:
rel_length =  torch.tensor([1.])
enc = classifier.encode_batch(tens.unsqueeze(0), rel_length)
enc.shape

torch.Size([1, 1, 192])

In [43]:
# probabilities of each class
preds = classifier.mods.classifier(enc).squeeze(1); preds

tensor([[-0.2665, -0.2459,  0.4907, -0.3841,  0.0562, -0.1903,  0.2246,  0.4099,
          0.3853, -0.1777]])

In [44]:
# batch of preds, but just 1 in the batch 
preds.shape

torch.Size([1, 10])

In [45]:
# check that our work agrees with the full implementation 
classifier.classify_file(sample.filepath)

(tensor([[-0.2665, -0.2459,  0.4907, -0.3841,  0.0562, -0.1903,  0.2246,  0.4099,
           0.3853, -0.1777]]),
 tensor([0.4907]),
 tensor([2]),
 ['air_conditioner'])

## Create dataset 

In [46]:
from torch.utils.data import Dataset

In [47]:
cats

['dehumidifier', 'gas_boiler', 'lesker_system']

In [48]:
train.head()

Unnamed: 0,filepath,label
23,data/appliances/dehumidifier/1000112-11-0-0.wav,dehumidifier
30,data/appliances/gas_boiler/3000180-12-0-0.wav,gas_boiler
24,data/appliances/dehumidifier/1000012-11-0-0.wav,dehumidifier
28,data/appliances/dehumidifier/1000004-11-0-0.wav,dehumidifier
17,data/appliances/dehumidifier/1000264-11-0-0.wav,dehumidifier


In [129]:
from functools import cache

class ApplianceDS(Dataset):
    def __init__(self, df):
        self.df = df
    def __len__(self): return len(df)
    
    @cache
    def __getitem__(self, i):
        item = df.iloc[i]
        X = classifier.load_audio(item.filepath)
        # X = classifier.encode_batch(X).squeeze(1)
        y = torch.zeros(3)
        y[cats.index(item.label)] = 1.
        return X, y

In [153]:
tensors = [classifier.load_audio(fp) for fp in df.filepath]

In [168]:
shapes = [t.shape[0] for t in tensors]

In [170]:
sorted(shapes)

[16000,
 17600,
 18944,
 22400,
 25600,
 48000,
 48000,
 48000,
 48000,
 48000,
 64000,
 64000,
 64000,
 64000,
 64000,
 64000,
 64000,
 64000,
 64000,
 64000,
 64000,
 64000,
 64000,
 64000,
 64000,
 64000,
 64000,
 64000,
 64000,
 64000,
 64000,
 64000,
 64000,
 64000,
 64000,
 64000,
 64000,
 64000,
 64000,
 64000,
 64000,
 64000,
 64000,
 64000,
 64000,
 64000,
 64000,
 64000,
 64000,
 64000,
 64000,
 64000,
 64000,
 64000,
 208000,
 662400,
 978944,
 985600]

In [130]:
val.shape

(10, 2)

In [131]:
train_ds = ApplianceDS(train)
val_ds = ApplianceDS(val)

In [132]:
train_dl = torch.utils.data.DataLoader(train_ds, batch_size=8, shuffle=True)
val_dl = torch.utils.data.DataLoader(val_ds, batch_size=8, shuffle=True)

In [133]:
for X, y in val_dl:
    print(X.shape, y.shape)

RuntimeError: stack expects each tensor to be equal size, but got [64000] at entry 0 and [16000] at entry 1

In [128]:
len(val_dl)

8

In [66]:
def reset_weights():
    classifier.mods.classifier.weight = torch.nn.Parameter(torch.FloatTensor(3, 192))
    torch.nn.init.xavier_uniform_(classifier.mods.classifier.weight)
reset_weights()

In [69]:
classifier.mods.classifier(X).squeeze(1)

tensor([[-0.0078, -0.0159,  0.0437],
        [ 0.0215, -0.0664,  0.0653]], grad_fn=<SqueezeBackward1>)

In [70]:
for param in classifier.mods.classifier.parameters():
    print(param.requires_grad)

True


In [105]:
# loss_fn = torch.nn.CrossEntropyLoss()
loss_fn = torch.nn.functional.binary_cross_entropy_with_logits

In [106]:
# loss_fn(0.,1.)

In [107]:
loss_fn(y,y)

tensor(0.5665)

In [108]:
loss_fn(torch.tensor([-0.000,  1., 0.00]), y[0])

tensor(0.5665)

In [109]:
torch.nn.functional.binary_cross_entropy(y,y)

tensor(0.)

In [110]:
reset_weights()

In [115]:
for X, y in val_dl:
    print(X.shape, y.shape)

torch.Size([8, 1, 192]) torch.Size([8, 3])
torch.Size([8, 1, 192]) torch.Size([8, 3])
torch.Size([8, 1, 192]) torch.Size([8, 3])
torch.Size([8, 1, 192]) torch.Size([8, 3])
torch.Size([8, 1, 192]) torch.Size([8, 3])
torch.Size([8, 1, 192]) torch.Size([8, 3])
torch.Size([8, 1, 192]) torch.Size([8, 3])
torch.Size([2, 1, 192]) torch.Size([2, 3])


In [113]:
# Define model, optimizer, loss function, and dataloader
optimizer = torch.optim.Adam(classifier.mods.classifier.parameters(), lr=0.001)

for epoch in range(1000):
    losses = []
    for batch_idx, (X, y) in enumerate(train_dl):
        ypred = classifier.mods.classifier(X).squeeze(1)
        loss = loss_fn(ypred, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        losses.append(loss.item())
    if epoch % 100 == 0:
        print(f"{np.mean(losses):.4f}")
        
        # print(f"Epoch {epoch}[{batch_idx}/{len(train_dl)}], Loss: {loss.item():.4f}")

0.5611
0.5774
0.5688
0.5656
0.5811
0.5772
0.5748
0.5661
0.5704
0.5700


In [550]:
Xval, yval = list(torch.utils.data.DataLoader(val_ds, batch_size=10))[0]

In [551]:
Xval.shape, yval.shape

(torch.Size([10, 1, 192]), torch.Size([10, 3]))

In [552]:
yval

tensor([[1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.]])

In [553]:
val

Unnamed: 0,filepath,label
15,data/appliances/dehumidifier/1000144-11-0-0.wav,dehumidifier
34,data/appliances/gas_boiler/3000012-12-0-0.wav,gas_boiler
29,data/appliances/gas_boiler/3000040-12-0-0.wav,gas_boiler
42,data/appliances/lesker_system/2000040-10-0-0.wav,lesker_system
41,data/appliances/lesker_system/2000180-10-0-0.wav,lesker_system
50,data/appliances/lesker_system/2000312-10-0-0.wav,lesker_system
12,data/appliances/dehumidifier/1000220-11-0-0.wav,dehumidifier
21,data/appliances/dehumidifier/1000481-11-0-0.wav,dehumidifier
0,data/appliances/dehumidifier/9000009-11-0-0.wav,dehumidifier
3,data/appliances/dehumidifier/1000060-11-0-0.wav,dehumidifier
