In [18]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [19]:
cd /content/drive/MyDrive/mygithub/pytorchForAudioPractice

/content/drive/.shortcut-targets-by-id/1j0tKfNJ74iEAtyLmY4PY6L2fktF9jL1I/mygithub/pytorchForAudioPractice


In [20]:
!pip install torch torchaudio torchvision

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [21]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
import torch.autograd.profiler as profiler

In [None]:
def download_mnist_datasets():
  train_data = datasets.MNIST(train=True, download=True, transform=ToTensor(), root="data")
  validate_data = datasets.MNIST(train=False, download=True, transform=ToTensor(), root="data")
  return train_data, validate_data


In [None]:
from torch.nn.modules.linear import Linear
class FeedForwardNN(nn.Module):
  def __init__(self):
    super().__init__()
    self.flatten = nn.Flatten()
    self.denseLayers = nn.Sequential(
        nn.Linear(28*28,256), # image size 28x28
        nn.ReLU(),
        nn.Linear(256,10) # 10 classes 0 to 9 
    )
    self.activation = nn.Softmax(dim=1)

  def forward(self, inputData):

    flattenedData = self.flatten(inputData)
    logits = self.denseLayers(flattenedData)
    predictions = self.activation(logits)

    return predictions

In [None]:
def trainOneEpoch(model, dataLoader, lossFn, optimiser, device):


  for input, target in dataLoader:

    input, target = input.to(device), target.to(device)

    #calculate loss
    predication = model(input)

    loss = lossFn(predication, target)

    # backpropagation and weight update

    optimiser.zero_grad()
    loss.backward()
    optimiser.step()

  print(f"loss: {loss.item()}")

In [None]:
def train(model, dataLoader, lossFn, optimiser, device, epochs):
  for i in range(epochs):

    print(f"Epoch {i+1}")
    trainOneEpoch(model, dataLoader, lossFn, optimiser, device)
    print("---------------------------")
  
  print("Finished training")

# **Model Train**

In [None]:
BATCH_SIZE = 128
LEARNING_RATE = 0.001
EPOCHS = 10

# Download data
train_data, _ = download_mnist_datasets()

# data load
dataLoader = DataLoader(train_data, batch_size=BATCH_SIZE)

# Build Model

if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"

print(f"Using {device}")

model = FeedForwardNN().to(device)

# Train

lossFn = nn.CrossEntropyLoss()
optimiser = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)


train(model, dataLoader, lossFn, optimiser, device, EPOCHS)

# Save Model

torch.save(model.state_dict(),"modelNN.pth")
print("Trained feed forward net saved at modelNN.pth")



Using cpu
Epoch 1
loss: 1.5117287635803223
---------------------------
Epoch 2
loss: 1.49992835521698
---------------------------
Epoch 3
loss: 1.4886845350265503
---------------------------
Epoch 4
loss: 1.4811148643493652
---------------------------
Epoch 5
loss: 1.475595474243164
---------------------------
Epoch 6
loss: 1.473410964012146
---------------------------
Epoch 7
loss: 1.4738837480545044
---------------------------
Epoch 8
loss: 1.4731812477111816
---------------------------
Epoch 9
loss: 1.4724453687667847
---------------------------
Epoch 10
loss: 1.4728208780288696
---------------------------
Finished training
Trained feed forward net saved at modelNN.pth


# Model Infere

In [None]:
CLASS_MAPPING = [
    "0",
    "1",
    "2",
    "3",
    "4",
    "5",
    "6",
    "7",
    "8",
    "9"
]

In [None]:
def predict(model, input, target, classMapping):
  # switch eval 
  model.eval()

  # run infere without gradient evaluation
  with torch.no_grad():
    predictions = model(input)

    # Tensor (1, 10) --> [[0.1, 0.001, .... , 0.8]]
    predictedIndex = predictions[0].argmax(0)

    predicted = classMapping[predictedIndex]
    expected = classMapping[target]

  return  predicted, expected

In [None]:
# Load Validate Data
_, validatingData = download_mnist_datasets()

# Load Model

feedForwardNetObj = FeedForwardNN()

dictStatLoaded = torch.load("modelNN.pth")

feedForwardNetObj.load_state_dict(dictStatLoaded)

# get sample from the validating data
input, target = validatingData[0][0], validatingData[0][1]

# Make inference

predicted, expected = predict(feedForwardNetObj, input, target, CLASS_MAPPING)

print(f"Predicted: '{predicted}', expected: '{expected}'")

Predicted: '7', expected: '7'


# Customized Urban Dataset

https://urbansounddataset.weebly.com/urbansound8k.html

# To download dataset

```
cd ---> path you need
!pip install opendatasets --upgrade --quiet

import opendatasets as od

dataset_url = 'https://goo.gl/8hY5ER'
od.download(dataset_url)
```

ref: https://jovian.ai/charmzshab/urban-sound-dataset

and also: https://www.kaggle.com/datasets/chrisfilo/urbansound8k

# Use Mel Spectrogram as transformer

In [25]:
from pandas.core.array_algos import transforms
import os
from torch.utils.data import Dataset
import pandas as pd
import torchaudio


class UrbanSoundDataset(Dataset):

  def __init__(self, annotationsFile, audioDir, transformer, sampleRate, sampleLength):
    self.annotations = pd.read_csv(annotationsFile)
    self.audioDir = audioDir
    self.transformer = transformer
    self.targetSampleRate = sampleRate
    self.sampleLength = sampleLength

  def __len__(self):
    return len(self.annotations)
    
  def __getitem__(self,index):
    audioSamplePath = self._getAudioSamplePath(index)
    label = self._getAudioSampleLabel(index)
    signal, sr = torchaudio.load(audioSamplePath)

    # need to resample if necessary
    signal = self._resampleIfNecessary(signal, sr)
    # need to mix the channels in case of stero or multi channel
    signal = self._mixChannelIfNecessary(signal)
    # need to check the size of the sample so that right padding added or trunked
    signal = self._adjustSampleLength(signal)
    # need to transform signal to the transforemer
    signal = self.transformer(signal)
    return signal, label

  def _getAudioSamplePath(self, index):
    fold = f"fold{self.annotations.iloc[index,5]}" # 5 where fold is located he cloumb # 5
    path = os.path.join(self.audioDir, fold, self.annotations.iloc[index, 0]) # where 0 is the raw of the .wav files names
    return path

  def _getAudioSampleLabel(self, index):
    return self.annotations.iloc[index, 6] # where the raw #6 is the classId

  def _resampleIfNecessary(self, signal, sr):
    if sr != self.targetSampleRate:
      resampler = torchaudio.transforms.Resample(sr, self.targetSampleRate)
      signal = resampler(signal)
    return signal

  def _mixChannelIfNecessary(self, signal):
    # example, if signal is stero so its shape will be (2,16000)
    if signal.shape[0] > 1:
      signal = torch.mean(signal, dim=0, keepdim=True)
    return signal

  def _adjustSampleLength(self, signal):
    signalLen = signal.shape[1]
    # check in case of small sample so needed padding to be added
    if signalLen < self.sampleLength:
      numMissingSample = self.sampleLength - signalLen
      lastDimPadding = (0, numMissingSample)
      signal = torch.nn.functional.pad(signal, lastDimPadding)
    
    # in case of signal len greater than required len
    elif signalLen > self.sampleLength:
      signal = signal[:, :self.sampleLength]

    return signal


In [26]:
ANNOTATION_FILE = "/content/drive/MyDrive/mygithub/pytorchForAudioPractice/data/UrbanSound8K/metadata/UrbanSound8K.csv"
AUDIO_DIR = "/content/drive/MyDrive/mygithub/pytorchForAudioPractice/data/UrbanSound8K/audio"
SAMPLE_RATE = 22050
NUM_SAMPLE = 22050

melSpectrogram = torchaudio.transforms.MelSpectrogram(
    sample_rate=SAMPLE_RATE,
    n_fft=1024,
    hop_length=512,
    n_mels=64)

usd = UrbanSoundDataset(ANNOTATION_FILE, AUDIO_DIR, melSpectrogram, SAMPLE_RATE, NUM_SAMPLE)

print(f"There are {len(usd)} samples in the dataset.")
signal, label = usd[0]



There are 8732 samples in the dataset.


In [27]:
label

3

In [28]:
signal.shape

torch.Size([1, 64, 44])