In [1]:
import pandas as pd

import torch
import torchaudio
import torch.nn as nn
import torch.optim as optim
from torch.nn import functional

import torchvision

import torchaudio.functional as F
import torchaudio.transforms as T

from torch.utils.data.dataloader import Dataset, T_co

import noisereduce as nr

from torch.utils.data import random_split
from torch.utils.data.dataloader import DataLoader

from sklearn import preprocessing

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))

In [4]:
class EmotionCNN(nn.Module):

    def __init__(self) -> None:
        super().__init__()

        self.network = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=5, padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=5, padding=1, stride=1),
            nn.ReLU(),
            nn.AvgPool2d(2, 2), # out: 32 x 128 x 128

            nn.Conv2d(32, 64, kernel_size=5, padding=1, stride=1),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=5, padding=1, stride=1),
            nn.ReLU(),
            nn.AvgPool2d(2, 2), # out: 64 x 53 x 53

            nn.Dropout(p=.4),

            nn.Flatten(),

            nn.Linear(in_features=64 * 53 * 53, out_features=512),
            nn.ReLU(),
            nn.Linear(in_features=512, out_features=128),
            nn.ReLU(),
            nn.Linear(in_features=128, out_features=10),
        )

    def forward(self, xb: torch.Tensor):
        return self.network(xb)

    def train_step(self, batch):
        images, labels = batch
        out = self(images) # generate predictions
        loss = functional.cross_entropy(out, labels) # calculate loss
        return loss

    def validation_step(self, batch):
        images, labels = batch
        out = self(images) # generate predictgions
        loss = functional.cross_entropy(out, labels) # calculate loss
        acc = accuracy(out, labels) # calculate accuracy
        return {"val_loss": loss.detach(), "val_acc": acc}

    def validation_epoch_end(self, outputs):
        batch_losses = [x["val_loss"] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()

        batch_accs = [x["val_acc"] for x in outputs]
        epoch_acc = torch.stack(batch_accs).mean()
        return {"val_loss": epoch_loss.item(), "val_acc": epoch_acc.item()}

    def epoch_end(self, epoch, result):
        print(f"Epoch {epoch}, train_loss: {result['train_loss']}, \
        val_loss: {result['val_loss']}, val_acc: {result['val_acc']}")


In [7]:
model = EmotionCNN()
model.load_state_dict(torch.load("./models/specgram_model.zip", map_location=torch.device('cpu')))

<All keys matched successfully>

In [8]:
model.eval()

EmotionCNN(
  (network): Sequential(
    (0): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): AvgPool2d(kernel_size=2, stride=2, padding=0)
    (5): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1), padding=(1, 1))
    (6): ReLU()
    (7): Conv2d(64, 64, kernel_size=(5, 5), stride=(1, 1), padding=(1, 1))
    (8): ReLU()
    (9): AvgPool2d(kernel_size=2, stride=2, padding=0)
    (10): Dropout(p=0.4, inplace=False)
    (11): Flatten(start_dim=1, end_dim=-1)
    (12): Linear(in_features=179776, out_features=512, bias=True)
    (13): ReLU()
    (14): Linear(in_features=512, out_features=128, bias=True)
    (15): ReLU()
    (16): Linear(in_features=128, out_features=10, bias=True)
  )
)

In [9]:
print(model)

EmotionCNN(
  (network): Sequential(
    (0): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): AvgPool2d(kernel_size=2, stride=2, padding=0)
    (5): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1), padding=(1, 1))
    (6): ReLU()
    (7): Conv2d(64, 64, kernel_size=(5, 5), stride=(1, 1), padding=(1, 1))
    (8): ReLU()
    (9): AvgPool2d(kernel_size=2, stride=2, padding=0)
    (10): Dropout(p=0.4, inplace=False)
    (11): Flatten(start_dim=1, end_dim=-1)
    (12): Linear(in_features=179776, out_features=512, bias=True)
    (13): ReLU()
    (14): Linear(in_features=512, out_features=128, bias=True)
    (15): ReLU()
    (16): Linear(in_features=128, out_features=10, bias=True)
  )
)
