In [438]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from collections import defaultdict, Counter

# Load the dataset
data = np.load('lab2_dataset.npz')
train_feats = torch.tensor(data['train_feats'])
test_feats = torch.tensor(data['test_feats'])
train_labels = torch.tensor(data['train_labels'])
test_labels = torch.tensor(data['test_labels'])
phone_labels = data['phone_labels']

# Set up the dataloaders
train_dataset = torch.utils.data.TensorDataset(train_feats, train_labels)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True)

test_dataset = torch.utils.data.TensorDataset(test_feats, test_labels)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=128, shuffle=False)


# Define the model architecture
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        # TODO: Fill in the model's layers here
        self.fnn = nn.Sequential(nn.Linear(11 * 40, 4096), nn.Sigmoid(), nn.Linear(4096, 48))

        
    def forward(self, x):
        # TODO: Fill in the forward pass here
        x = x.view(x.size(0), -1)
        x = self.fnn(x)

        return x


# Instantiate the model, loss function, and optimizer
model = MyModel()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

    
def train_network(model, train_loader, criterion, optimizer):
    # TODO: fill in
    print("Begin training network")
    for epoch in range(10):
        for i, (inputs, labels) in enumerate(train_loader, 0):
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
    print("Training finished.\n")

label_list = []
correct_count = defaultdict(int)
incorrect_count = defaultdict(int)

def test_network(model, test_loader):
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader:
            inputs, labels = data
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            
            # For computing accuracy for each class
            label_list.append(labels)
            for i, label in enumerate(labels):
                if predicted[i] == label:
                    correct_count[phone_labels[label]] += 1
                elif phone_labels[label] == "sh" and label in predicted:
                    incorrect_count[phone_labels[predicted[i]]] += 1
            
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
                
    print('Test accuracy: %d %%' % (100 * correct / total))
    
train_network(model, train_loader, criterion, optimizer)
test_network(model, test_loader)

Begin training network
Training finished.

Test accuracy: 58 %


In [404]:
labels_counter = Counter(torch.cat(label_list).tolist())
total_acc = 0.0
individual_acc = {}

for k, v in labels_counter.items():
    individual_acc[phone_labels[k]] = 100 * correct_count[phone_labels[k]] / v

three_highest = Counter(individual_acc).most_common(3)
three_lowest = Counter(individual_acc).most_common()[-3:]

print(three_highest)
print(three_lowest)
print()

phonemes_to_check = ['sh', 'p', 'm', 'r', 'ae']
for phoneme in phonemes_to_check:
    print("Accruacy for {}: {}".format(phoneme, individual_acc[phoneme]))

[('sh', 90.0), ('sil', 89.0), ('s', 88.0)]
[('uh', 27.0), ('ih', 26.0), ('zh', 20.54794520547945)]

Accruacy for sh: 90.0
Accruacy for p: 41.0
Accruacy for m: 66.0
Accruacy for r: 59.0
Accruacy for ae: 57.0


In [439]:
print(incorrect_count)

defaultdict(<class 'int'>, {'s': 15, 'f': 2, 'ch': 9, 'zh': 1})
