In [3]:
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import torch
import torchaudio
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, models, transforms
from torchinfo import summary
import pandas as pd
import os

### Load spectrogram images into a data loader for training

In [4]:
data_path = './data/spectrograms' #looking in subfolder train

yes_no_dataset = datasets.ImageFolder(
    root=data_path,
    transform=transforms.Compose([transforms.Resize((201,81)),
                                  transforms.ToTensor()
                                  ])
)
print(yes_no_dataset)

Dataset ImageFolder
    Number of datapoints: 7985
    Root location: ./data/spectrograms
    StandardTransform
Transform: Compose(
               Resize(size=(201, 81), interpolation=bilinear, max_size=None, antialias=warn)
               ToTensor()
           )


In [5]:
class_map=yes_no_dataset.class_to_idx

print("\nClass category and index of the images: {}\n".format(class_map))


Class category and index of the images: {'no': 0, 'yes': 1}



### Split the data for training and testing

In [6]:
#split data to test and train
#use 80% to train
train_size = int(0.8 * len(yes_no_dataset))
test_size = len(yes_no_dataset) - train_size
yes_no_train_dataset, yes_no_test_dataset = torch.utils.data.random_split(yes_no_dataset, [train_size, test_size])

print("Training size:", len(yes_no_train_dataset))
print("Testing size:",len(yes_no_test_dataset))

Training size: 6388
Testing size: 1597


In [8]:
from collections import Counter

# labels in training set
train_classes = [label for _, label in yes_no_train_dataset]
Counter(train_classes)

Counter({0: 3155, 1: 3233})

In [9]:
train_dataloader = torch.utils.data.DataLoader(
    yes_no_train_dataset,
    batch_size=15,
    num_workers=2,
    shuffle=True
)

test_dataloader = torch.utils.data.DataLoader(
    yes_no_test_dataset,
    batch_size=15,
    num_workers=2,
    shuffle=True
)

In [10]:
td = train_dataloader.dataset[0][0][0][0]
print(td)

tensor([0.1373, 0.1412, 0.1333, 0.1647, 0.1294, 0.1294, 0.1686, 0.1333, 0.1686,
        0.1529, 0.1294, 0.1176, 0.2353, 0.1333, 0.1490, 0.1412, 0.1529, 0.1333,
        0.1882, 0.2196, 0.2392, 0.1647, 0.2392, 0.3176, 0.2667, 0.1686, 0.1882,
        0.1255, 0.2784, 0.3333, 0.2510, 0.2510, 0.2392, 0.1529, 0.3843, 0.3686,
        0.4039, 0.2431, 0.1647, 0.3922, 0.3686, 0.1255, 0.1804, 0.2078, 0.1294,
        0.2235, 0.2392, 0.1647, 0.1451, 0.1451, 0.1529, 0.1373, 0.2431, 0.3098,
        0.2784, 0.1725, 0.3020, 0.1843, 0.2235, 0.2314, 0.1725, 0.2784, 0.2667,
        0.1255, 0.1725, 0.1569, 0.1216, 0.1294, 0.1216, 0.1176, 0.1255, 0.1333,
        0.1216, 0.1255, 0.1922, 0.1176, 0.1490, 0.1216, 0.1216, 0.1255, 0.1176])


In [11]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))

Using cpu device


### Create the convolutional neural network

In [12]:
class CNNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(51136, 50)
        self.fc2 = nn.Linear(50, 2)


    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        #x = x.view(x.size(0), -1)
        x = self.flatten(x)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = F.relu(self.fc2(x))
        return F.log_softmax(x,dim=1)

model = CNNet().to(device)

#### Create train and test functions

In [13]:
# cost function used to determine best parameters
cost = torch.nn.CrossEntropyLoss()

# used to create optimal parameters
learning_rate = 0.0001
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Create the training function

def train(dataloader, model, loss, optimizer):
    model.train()
    size = len(dataloader.dataset)
    for batch, (X, Y) in enumerate(dataloader):

        X, Y = X.to(device), Y.to(device)
        optimizer.zero_grad()
        pred = model(X)
        loss = cost(pred, Y)
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f'loss: {loss:>7f}  [{current:>5d}/{size:>5d}]')


# Create the validation/test function

def test(dataloader, model):
    size = len(dataloader.dataset)
    model.eval()
    test_loss, correct = 0, 0

    with torch.no_grad():
        for batch, (X, Y) in enumerate(dataloader):
            X, Y = X.to(device), Y.to(device)
            pred = model(X)

            test_loss += cost(pred, Y).item()
            correct += (pred.argmax(1)==Y).type(torch.float).sum().item()

    test_loss /= size
    correct /= size

    print(f'\nTest Error:\nacc: {(100*correct):>0.1f}%, avg loss: {test_loss:>8f}\n')

#### Train the model

In [14]:
epochs = 15

for t in range(epochs):
    print(f'Epoch {t+1}\n-------------------------------')
    train(train_dataloader, model, cost, optimizer)
    test(test_dataloader, model)
print('Done!')

Epoch 1
-------------------------------
loss: 0.693513  [    0/ 6388]
loss: 0.505987  [ 1500/ 6388]
loss: 0.560299  [ 3000/ 6388]
loss: 0.158439  [ 4500/ 6388]
loss: 0.166143  [ 6000/ 6388]

Test Error:
acc: 91.7%, avg loss: 0.013633

Epoch 2
-------------------------------
loss: 0.487377  [    0/ 6388]
loss: 0.247089  [ 1500/ 6388]
loss: 0.087810  [ 3000/ 6388]
loss: 0.257693  [ 4500/ 6388]
loss: 0.193578  [ 6000/ 6388]

Test Error:
acc: 93.0%, avg loss: 0.010607

Epoch 3
-------------------------------
loss: 0.188884  [    0/ 6388]
loss: 0.144731  [ 1500/ 6388]
loss: 0.289118  [ 3000/ 6388]
loss: 0.151750  [ 4500/ 6388]
loss: 0.122622  [ 6000/ 6388]

Test Error:
acc: 93.4%, avg loss: 0.009964

Epoch 4
-------------------------------
loss: 0.098662  [    0/ 6388]
loss: 0.053314  [ 1500/ 6388]
loss: 0.139524  [ 3000/ 6388]
loss: 0.260953  [ 4500/ 6388]
loss: 0.042041  [ 6000/ 6388]

Test Error:
acc: 94.1%, avg loss: 0.009297

Epoch 5
-------------------------------
loss: 0.173291  [   

In [15]:
summary(model, input_size=(15, 3, 201, 81))

  action_fn=lambda data: sys.getsizeof(data.storage()),
  return super().__sizeof__() + self.nbytes()


Layer (type:depth-idx)                   Output Shape              Param #
CNNet                                    [15, 2]                   --
├─Conv2d: 1-1                            [15, 32, 197, 77]         2,432
├─Conv2d: 1-2                            [15, 64, 94, 34]          51,264
├─Dropout2d: 1-3                         [15, 64, 94, 34]          --
├─Flatten: 1-4                           [15, 51136]               --
├─Linear: 1-5                            [15, 50]                  2,556,850
├─Linear: 1-6                            [15, 2]                   102
Total params: 2,610,648
Trainable params: 2,610,648
Non-trainable params: 0
Total mult-adds (G): 3.05
Input size (MB): 2.93
Forward/backward pass size (MB): 82.80
Params size (MB): 10.44
Estimated Total Size (MB): 96.17

In [16]:
#### Test the model

In [17]:
model.eval()
test_loss, correct = 0, 0
class_map = ['no', 'yes']

with torch.no_grad():
    for batch, (X, Y) in enumerate(test_dataloader):
        X, Y = X.to(device), Y.to(device)
        pred = model(X)
        print("Predicted:\nvalue={}, class_name= {}\n".format(pred[0].argmax(0),class_map[pred[0].argmax(0)]))
        print("Actual:\nvalue={}, class_name= {}\n".format(Y[0],class_map[Y[0]]))
        break

Predicted:
value=0, class_name= no

Actual:
value=0, class_name= no

