In [1]:
# Code for generating stuff to be used in the report
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import torchvision

batch_size = 4

train_dataset = torchvision.datasets.ImageFolder(root='spectrograms/train', transform=transforms.ToTensor())
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
train_mean = torch.stack([img.mean(1).mean(1) for img, _ in train_dataset]).mean(0)
train_std = torch.stack([img.std(1).std(1) for img, _ in train_dataset]).mean(0)

data_transforms = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=train_mean, std=train_std)
])

train_dataset = torchvision.datasets.ImageFolder("spectrograms/train", transform=data_transforms)
train_dataset, valid_dataset = torch.utils.data.random_split(train_dataset, [0.8,0.2])

test_dataset = torchvision.datasets.ImageFolder("spectrograms/test", transform=transforms.ToTensor())
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

test_transforms = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=train_mean, std=train_std)
])

test_dataset = torchvision.datasets.ImageFolder("spectrograms/test", transform=test_transforms)

trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
validloader = torch.utils.data.DataLoader(valid_dataset, batch_size=batch_size, shuffle=True)
testloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 16, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(16, 32, 5)
        self.fc1 = nn.Linear(32 * 29 * 29, 128)
        self.fc2 = nn.Linear(128, 16)
        self.fc3 = nn.Linear(16, 2)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x






cpu


Net(
  (conv1): Conv2d(3, 16, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=26912, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=16, bias=True)
  (fc3): Linear(in_features=16, out_features=2, bias=True)
)

In [3]:
import wandb

epochs = 8
params = [
    {"lr": 0.001, "momentum": 0.9, "title": "lr=0.001, momentum=0.9"},
    {"lr": 0.001, "momentum": 0.5, "title": "lr=0.001, momentum=0.5"},
    {"lr": 0.001, "momentum": 0.1, "title": "lr=0.001, momentum=0.1"},
    {"lr": 0.003, "momentum": 0.9, "title": "lr=0.003, momentum=0.9"},
    {"lr": 0.003, "momentum": 0.5, "title": "lr=0.003, momentum=0.5"},
    {"lr": 0.003, "momentum": 0.1, "title": "lr=0.003, momentum=0.1"},
    {"lr": 0.01, "momentum": 0.9, "title": "lr=0.01, momentum=0.9"},
    {"lr": 0.01, "momentum": 0.5, "title": "lr=0.01, momentum=0.5"},
    {"lr": 0.01, "momentum": 0.1, "title": "lr=0.01, momentum=0.1"},
]


In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
import torch.optim as optim
for params_set in params:
    net = Net()

    net.to(device)
    wandb.init(project="Voice-Recognition", config={
        "learning_rate": params_set["lr"],
        "momentum": params_set["momentum"],
        "epochs": epochs,
        "architecture": "CNN"
    }, group="model comps", reinit=True)
    wandb.run.name = params_set["title"]
    wandb.run.save()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=params_set["lr"], momentum=params_set["momentum"])
    correct_train = 0
    total_train = 0
    training_loss = []

    for epoch in range(epochs):

        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data[0].to(device), data[1].to(device)

            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            training_loss.append(loss.item())
            loss.backward()
            optimizer.step()

            _, predicted = torch.max(outputs.data, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()

            # print statistics
            running_loss += loss.item()

            if i % 200 == 199:    # print every 200 mini-batches
                print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 200:.3f}')
                running_loss = 0.0
        wandb.log({f"train loss": np.mean(training_loss), "train acc": correct_train / total_train})
        correct = 0
        total = 0
        loss = []
        # since we're not training, we don't need to calculate the gradients for our outputs
        with torch.no_grad():
            for data in validloader:
                images, labels = data
                images, labels = images.to(device), labels.to(device)
                outputs = net(images)
                current_loss = criterion(outputs, labels)
                loss.append(current_loss.item())
                # the class with the highest energy is what we choose as prediction
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        
            wandb.log({"validation loss": np.mean(loss), "validation acc": correct / total})
    print('Finished Training')
    correct = 0
    total = 0
    all_labels = []
    all_predictions = []
    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for i, data in enumerate(testloader, 0):
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            all_labels.extend(labels.cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())

    print(f'Accuracy of the network: {100 * correct // total} %')
    wandb.run.summary["test acc"] = 100 * correct // total
    wandb.log({"confusion matrix": wandb.plot.confusion_matrix(probs=None,
                                                                y_true=all_labels,
                                                                preds=all_predictions,
                                                                class_names=["class 0", "class 1"])})
    wandb.run.finish()





[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mwaluzenicz-ignacy[0m ([33mIntro-ML[0m). Use [1m`wandb login --relogin`[0m to force relogin




[1,   200] loss: 0.696
[1,   400] loss: 0.691
[1,   600] loss: 0.692
[1,   800] loss: 0.695
[1,  1000] loss: 0.693
[1,  1200] loss: 0.691
[1,  1400] loss: 0.690
[1,  1600] loss: 0.693
[1,  1800] loss: 0.687
[1,  2000] loss: 0.691
[1,  2200] loss: 0.693
[1,  2400] loss: 0.695
[1,  2600] loss: 0.694
[1,  2800] loss: 0.694
[1,  3000] loss: 0.692
[1,  3200] loss: 0.693
[1,  3400] loss: 0.689
[1,  3600] loss: 0.691
[1,  3800] loss: 0.692
[1,  4000] loss: 0.692
[1,  4200] loss: 0.690
[1,  4400] loss: 0.689
[1,  4600] loss: 0.694
[1,  4800] loss: 0.692
[1,  5000] loss: 0.693
[1,  5200] loss: 0.691
[1,  5400] loss: 0.694
[1,  5600] loss: 0.693
[1,  5800] loss: 0.693
[1,  6000] loss: 0.688
[1,  6200] loss: 0.689
[1,  6400] loss: 0.688
[1,  6600] loss: 0.689
[1,  6800] loss: 0.665
[1,  7000] loss: 0.659
[1,  7200] loss: 0.604
[1,  7400] loss: 0.550
[1,  7600] loss: 0.563
[1,  7800] loss: 0.449
[1,  8000] loss: 0.426
[1,  8200] loss: 0.367
[2,   200] loss: 0.247
[2,   400] loss: 0.341
[2,   600] 

VBox(children=(Label(value='0.015 MB of 0.015 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
train acc,▁▅▆▇▇███
train loss,█▅▃▂▂▁▁▁
validation acc,▁▇▇█▇███
validation loss,█▂▂▁▃▁▁▁

0,1
test acc,96.0
train acc,0.93136
train loss,0.12048
validation acc,0.98503
validation loss,0.03972


[1,   200] loss: 0.019
[1,   400] loss: 0.007
[1,   600] loss: 0.016
[1,   800] loss: 0.001
[1,  1000] loss: 0.005
[1,  1200] loss: 0.007
[1,  1400] loss: 0.005
[1,  1600] loss: 0.006
[1,  1800] loss: 0.007
[1,  2000] loss: 0.004
[1,  2200] loss: 0.003
[1,  2400] loss: 0.002
[1,  2600] loss: 0.003
[1,  2800] loss: 0.003
[1,  3000] loss: 0.003
[1,  3200] loss: 0.001
[1,  3400] loss: 0.002
[1,  3600] loss: 0.007
[1,  3800] loss: 0.002
[1,  4000] loss: 0.002
[1,  4200] loss: 0.005
[1,  4400] loss: 0.004
[1,  4600] loss: 0.005
[1,  4800] loss: 0.008
[1,  5000] loss: 0.001
[1,  5200] loss: 0.003
[1,  5400] loss: 0.005
[1,  5600] loss: 0.005
[1,  5800] loss: 0.006
[1,  6000] loss: 0.003
[1,  6200] loss: 0.004
[1,  6400] loss: 0.006
[1,  6600] loss: 0.008
[1,  6800] loss: 0.003
[1,  7000] loss: 0.003
[1,  7200] loss: 0.009
[1,  7400] loss: 0.003
[1,  7600] loss: 0.005
[1,  7800] loss: 0.009
[1,  8000] loss: 0.003
[1,  8200] loss: 0.005
[2,   200] loss: 0.006
[2,   400] loss: 0.002
[2,   600] 

KeyboardInterrupt: 



In [7]:
wandb.run.finish()

VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

[34m[1mwandb[0m: [32m[41mERROR[0m Control-C detected -- Run data was not synced


KeyboardInterrupt: 