In [None]:
import dataset
import classificationModel
import pandas as pd
import numpy as np
import torch
from sklearn.preprocessing import OrdinalEncoder
from torch.utils.data import random_split

In [None]:
categories = ["chirping_birds", "crackling_fire", "hand_saw", "chainsaw", "helicopter"]
audio_path = './ESC-50/audio/'
meta_path = './ESC-50/meta/'
nothing_path = './room_sounds/'

data = pd.read_csv(meta_path + 'esc50.csv')

## Remove rows where the category does not belong to categories
data = data[data.category.isin(categories)]

re_encoder = OrdinalEncoder(dtype=np.long)
re_encoder.fit(data[["category"]])
data[["category"]] = re_encoder.transform(data[["category"]])

print(re_encoder.inverse_transform(np.array([0,1,2,3,4]).reshape(-1,1)))
data

In [None]:
file_path = audio_path + data["filename"].iloc[0]
aud = dataset.AudioUtil.open(file_path)
dataset.AudioUtil.displayTime(aud)
# dataset.AudioUtil.playSound(aud)
aud = dataset.AudioUtil.resample(aud,11025)
melspec = dataset.AudioUtil.toMelSpec(aud)
dataset.AudioUtil.displayMelspec(melspec)

In [None]:
ds = dataset.SoundDS(data,audio_path,True,4)
# Random split of 80:20 between training and validation
num_items = len(ds)
print(num_items)
num_train = round(num_items * 0.8)
num_val = num_items - num_train
train_ds, val_ds = random_split(ds, [num_train, num_val])

# Create training and validation data loaders
train_dl = torch.utils.data.DataLoader(train_ds, batch_size=16, shuffle=True)
val_dl = torch.utils.data.DataLoader(val_ds, batch_size=16, shuffle=False)
test_dl = torch.utils.data.DataLoader(val_ds, batch_size=1, shuffle=False)


In [None]:
myModel = classificationModel.AudioClassifier()
device = torch.device("cpu")
myModel = myModel.to(device)
# Check that it is on Cuda
next(myModel.parameters()).device

In [None]:
num_epochs=10   # Just for demo, adjust this higher.settings
classificationModel.training(myModel, train_dl, num_epochs)

In [None]:
print("bill")
torch.save(myModel.state_dict(),'myModel.pt')
classificationModel.inference(myModel, val_dl)

In [None]:
def manual_testing (model, val_dl):
  correct_prediction = 0
  total_prediction = 0

  # Disable gradient updates
  with torch.no_grad():
    for data in val_dl:
      
      inputs = torch.Tensor(16,1,20,10)
      labels = torch.Tensor(16)
      # Get the input features and target labels, and put them on the GPU
      inputs[0] = data[0].to(device)[0]
      labels[0] = data[1].to(device)
      for i in range(1,16):
        inputs[i], labels[i] = ds[np.random.randint(0,len(ds))]
      # Normalize the inputs
      
      # inputs_m, inputs_s = inputs.mean(), inputs.std()
      # inputs = (inputs - inputs_m) / inputs_s

      # dataset.AudioUtil.displayMelspec(inputs[0])

      # Get predictions
      outputs = model(inputs)
      # Get the predicted class with the highest score
      _, prediction = torch.max(outputs,1)
      # Count of predictions that matched the target label
      correct_prediction += (prediction[0] == labels[0]).sum().item()
      total_prediction += 1
    
  acc = correct_prediction/total_prediction
  print(f'Accuracy: {acc:.2f}, Total items: {total_prediction}')

manual_testing(myModel,test_dl)