In [31]:
# Read data from the same train_X and train_y used to get good results on tensorflow
import pickle

with open('Librispeech_Tensorflow_X_train.pkl','rb') as f:
    X_train = pickle.load(f)
with open('Librispeech_Tensorflow_y_train.pkl','rb') as f:
    y_train = pickle.load(f)
with open('Librispeech_Tensorflow_X_val.pkl','rb') as f:
    X_val = pickle.load(f)
with open('Librispeech_Tensorflow_y_val.pkl','rb') as f:
    y_val = pickle.load(f)
with open('Librispeech_Tensorflow_X_test.pkl','rb') as f:
    X_test = pickle.load(f)
with open('Librispeech_Tensorflow_y_test.pkl','rb') as f:
    y_test = pickle.load(f)

In [32]:
import torch
from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):
    def __init__(self, parameters, labels, device):
        self.parameters = parameters
        self.labels = labels
        self.device = device

    def __len__(self):
        return len(self.parameters)

    def __getitem__(self, idx):
        param = torch.tensor(self.parameters[idx], dtype=torch.float32).to(device)
        label = torch.tensor(self.labels[idx], dtype=torch.long).to(device)
        return param, label

In [33]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(f"Using device {device}")

Using device cpu


In [43]:
import torch
import torch.nn as nn
import torch.optim as optim

class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(193, 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, 251)
        self.dropout1 = nn.Dropout(0.1)
        self.dropout2 = nn.Dropout(0.25)
        self.dropout3 = nn.Dropout(0.5)
        self.softmax = nn.Softmax()

    def forward(self, x):
        x = torch.nn.functional.relu(self.fc1(x))
        x = self.dropout1(x)
        x = torch.nn.functional.relu(self.fc2(x))
        x = self.dropout2(x)
        x = self.fc3(x)
        x = self.dropout3(x)
        x = self.softmax(x)
        #print(x)
        return x

In [44]:
model = NeuralNetwork()
model.to(device)

NeuralNetwork(
  (fc1): Linear(in_features=193, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=251, bias=True)
  (dropout1): Dropout(p=0.1, inplace=False)
  (dropout2): Dropout(p=0.25, inplace=False)
  (dropout3): Dropout(p=0.5, inplace=False)
  (softmax): Softmax(dim=None)
)

In [45]:
from torchinfo import summary

summary(model, input_size=(1, 193))

Layer (type:depth-idx)                   Output Shape              Param #
NeuralNetwork                            [1, 251]                  --
├─Linear: 1-1                            [1, 128]                  24,832
├─Dropout: 1-2                           [1, 128]                  --
├─Linear: 1-3                            [1, 128]                  16,512
├─Dropout: 1-4                           [1, 128]                  --
├─Linear: 1-5                            [1, 251]                  32,379
├─Dropout: 1-6                           [1, 251]                  --
├─Softmax: 1-7                           [1, 251]                  --
Total params: 73,723
Trainable params: 73,723
Non-trainable params: 0
Total mult-adds (M): 0.07
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.29
Estimated Total Size (MB): 0.30

In [46]:
train_loader = torch.utils.data.DataLoader(CustomDataset(X_train, y_train.argmax(1), device), batch_size=256, shuffle=True)
val_loader = torch.utils.data.DataLoader(CustomDataset(X_val, y_val.argmax(1), device), batch_size=256)

criterion = nn.CrossEntropyLoss()
# criterion = nn.functional.cross_entropy
optimizer = optim.Adam(model.parameters(), lr=0.001)

best_val_loss = float('inf')
patience = 100
counter = 0
num_epochs = 200

start_epoch = 0
for epoch in range(num_epochs):
    print(f"Epoch: {start_epoch + epoch + 1}/{start_epoch + num_epochs}")
    # Training loop
    model.train()
    train_loss_accumulator = 0.0
    train_correct_accumulator = 0
    train_total_samples = 0
    for batch_idx, (data, targets) in enumerate(train_loader):
        # print(f"Data: {data}")
        # print(f"Targets: {targets}")
        # print(f"Batch: {batch_idx}")
        optimizer.zero_grad()
        outputs = model(data)
        #print(f"Outputs: {outputs}")
        #print(f"Targets: {targets}")
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss_accumulator += loss.item() * data.size(0)
        _, predicted = torch.max(outputs, 1)
        train_correct_accumulator += (predicted == targets).sum().item()
        train_total_samples += data.size(0)
    train_loss = train_loss_accumulator / train_total_samples
    train_accuracy = train_correct_accumulator / train_total_samples

    #Validation loop
    model.eval()
    val_loss_accumulator = 0.0
    val_correct_accumulator = 0
    val_total_samples = 0
    with torch.no_grad():
        val_loss = 0
        for data, targets in val_loader:
            outputs = model(data)
            loss = criterion(outputs, targets)
            val_loss_accumulator += loss.item() * data.size(0)
            _, predicted = torch.max(outputs, 1)
            val_correct_accumulator += (predicted==targets).sum().item()
            val_total_samples += data.size(0)

        val_loss = val_loss_accumulator / val_total_samples
        val_accuracy = val_correct_accumulator / val_total_samples

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            counter = 0
        else:
            counter += 1
            if counter >= patience:
                print("Early stopping triggered")
                break
    print(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}")
    print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}")

Epoch: 1/200
Train Loss: 5.5124, Train Accuracy: 0.0338
Validation Loss: 5.4817, Validation Accuracy: 0.1126
0.001
Epoch: 2/200
Train Loss: 5.4425, Train Accuracy: 0.1040
Validation Loss: 5.3796, Validation Accuracy: 0.3324
0.001
Epoch: 3/200
Train Loss: 5.3461, Train Accuracy: 0.2080
Validation Loss: 5.2255, Validation Accuracy: 0.5314
0.001
Epoch: 4/200
Train Loss: 5.2822, Train Accuracy: 0.2698
Validation Loss: 5.1147, Validation Accuracy: 0.6070
0.001
Epoch: 5/200
Train Loss: 5.2418, Train Accuracy: 0.3072
Validation Loss: 5.0349, Validation Accuracy: 0.6490
0.001
Epoch: 6/200
Train Loss: 5.2169, Train Accuracy: 0.3278
Validation Loss: 4.9863, Validation Accuracy: 0.6716
0.001
Epoch: 7/200
Train Loss: 5.2072, Train Accuracy: 0.3362
Validation Loss: 4.9491, Validation Accuracy: 0.6945
0.001
Epoch: 8/200
Train Loss: 5.1982, Train Accuracy: 0.3435
Validation Loss: 4.9199, Validation Accuracy: 0.7074
0.001
Epoch: 9/200
Train Loss: 5.1899, Train Accuracy: 0.3490
Validation Loss: 4.9040,

In [47]:
criterion = nn.CrossEntropyLoss()
# criterion = nn.functional.cross_entropy
optimizer = optim.Adam(model.parameters(), lr=0.0001)

best_val_loss = float('inf')
patience = 100
counter = 0
num_epochs = 100

start_epoch = 200
for epoch in range(num_epochs):
    print(f"Epoch: {start_epoch + epoch + 1}/{start_epoch + num_epochs}")
    # Training loop
    model.train()
    train_loss_accumulator = 0.0
    train_correct_accumulator = 0
    train_total_samples = 0
    for batch_idx, (data, targets) in enumerate(train_loader):
        # print(f"Data: {data}")
        # print(f"Targets: {targets}")
        # print(f"Batch: {batch_idx}")
        optimizer.zero_grad()
        outputs = model(data)
        #print(f"Outputs: {outputs}")
        #print(f"Targets: {targets}")
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss_accumulator += loss.item() * data.size(0)
        _, predicted = torch.max(outputs, 1)
        train_correct_accumulator += (predicted == targets).sum().item()
        train_total_samples += data.size(0)
    train_loss = train_loss_accumulator / train_total_samples
    train_accuracy = train_correct_accumulator / train_total_samples

    #Validation loop
    model.eval()
    val_loss_accumulator = 0.0
    val_correct_accumulator = 0
    val_total_samples = 0
    with torch.no_grad():
        val_loss = 0
        for data, targets in val_loader:
            outputs = model(data)
            loss = criterion(outputs, targets)
            val_loss_accumulator += loss.item() * data.size(0)
            _, predicted = torch.max(outputs, 1)
            val_correct_accumulator += (predicted==targets).sum().item()
            val_total_samples += data.size(0)

        val_loss = val_loss_accumulator / val_total_samples
        val_accuracy = val_correct_accumulator / val_total_samples

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            counter = 0
        else:
            counter += 1
            if counter >= patience:
                print("Early stopping triggered")
                break
    print(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}")
    print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}")

Epoch: 201/300
Train Loss: 5.0415, Train Accuracy: 0.4889
Validation Loss: 4.5724, Validation Accuracy: 0.9609
Epoch: 202/300
Train Loss: 5.0379, Train Accuracy: 0.4924
Validation Loss: 4.5717, Validation Accuracy: 0.9609
Epoch: 203/300
Train Loss: 5.0410, Train Accuracy: 0.4896
Validation Loss: 4.5710, Validation Accuracy: 0.9627
Epoch: 204/300
Train Loss: 5.0410, Train Accuracy: 0.4900
Validation Loss: 4.5705, Validation Accuracy: 0.9634
Epoch: 205/300
Train Loss: 5.0407, Train Accuracy: 0.4903
Validation Loss: 4.5705, Validation Accuracy: 0.9631
Epoch: 206/300
Train Loss: 5.0398, Train Accuracy: 0.4910
Validation Loss: 4.5702, Validation Accuracy: 0.9634
Epoch: 207/300
Train Loss: 5.0422, Train Accuracy: 0.4879
Validation Loss: 4.5700, Validation Accuracy: 0.9638
Epoch: 208/300
Train Loss: 5.0425, Train Accuracy: 0.4879
Validation Loss: 4.5699, Validation Accuracy: 0.9638
Epoch: 209/300
Train Loss: 5.0440, Train Accuracy: 0.4867
Validation Loss: 4.5699, Validation Accuracy: 0.9638
E

In [57]:
X_test.shape

(8733, 193)

In [56]:
import random

samples = random.sample(range(X_test.shape[0]),100)

In [55]:
total_correct = 0
model.eval()

for i in range(len(samples)):
    truth = y_test.argmax(1)[samples[i]]
    prediction = model(torch.tensor([X_test[samples[i]]], dtype=torch.float32)).argmax()
    if truth == prediction:
        total_correct += 1
    print(f"{i}: {truth} - {prediction}")
print(f"{total_correct}/{len(samples)} = {total_correct * 100 / len(samples):.2f}%")

0: 113 - 113
1: 65 - 65
2: 44 - 44
3: 92 - 92
4: 65 - 65
5: 98 - 98
6: 74 - 74
7: 71 - 71
8: 7 - 7
9: 130 - 130
10: 202 - 202
11: 134 - 134
12: 172 - 172
13: 137 - 137
14: 84 - 84
15: 198 - 198
16: 139 - 139
17: 115 - 115
18: 184 - 184
19: 174 - 174
20: 173 - 173
21: 222 - 222
22: 8 - 8
23: 128 - 128
24: 83 - 83
25: 88 - 88
26: 213 - 213
27: 54 - 54
28: 134 - 134
29: 148 - 148
30: 191 - 191
31: 110 - 110
32: 209 - 209
33: 133 - 133
34: 247 - 247
35: 243 - 243
36: 180 - 180
37: 105 - 105
38: 174 - 174
39: 236 - 236
40: 81 - 81
41: 116 - 116
42: 154 - 154
43: 135 - 135
44: 201 - 201
45: 48 - 48
46: 43 - 43
47: 3 - 73
48: 36 - 36
49: 0 - 0
50: 134 - 134
51: 100 - 100
52: 120 - 120
53: 58 - 58
54: 25 - 25
55: 223 - 223
56: 147 - 147
57: 129 - 129
58: 88 - 88
59: 181 - 181
60: 69 - 69
61: 53 - 53
62: 197 - 197
63: 52 - 52
64: 175 - 175
65: 26 - 26
66: 137 - 137
67: 146 - 146
68: 68 - 68
69: 22 - 22
70: 3 - 1
71: 208 - 208
72: 12 - 12
73: 27 - 82
74: 129 - 129
75: 154 - 154
76: 213 - 213
77:

In [58]:
# Let's try with a model that is slightly closer to what Jurgen was using (the previous model was missing a layer)
# This model has four fully connected layers with a relu function between each layer.
import torch
import torch.nn as nn
import torch.optim as optim

class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(193, 193)
        self.fc2 = nn.Linear(193, 128)
        self.fc3 = nn.Linear(128, 128)
        self.fc4 = nn.Linear(128, 251)
        self.dropout1 = nn.Dropout(0.1)
        self.dropout2 = nn.Dropout(0.25)
        self.dropout3 = nn.Dropout(0.5)
        self.softmax = nn.Softmax()

    def forward(self, x):
        x = torch.nn.functional.relu(self.fc1(x))
        x = self.dropout1(x)
        x = torch.nn.functional.relu(self.fc2(x))
        x = self.dropout2(x)
        x = torch.nn.functional.relu(self.fc3(x))
        x = self.dropout3(x)
        x = self.fc4(x)
        x = self.softmax(x)
        #print(x)
        return x

In [None]:
# Train again:
model = NeuralNetwork()
model.to(device)

train_loader = torch.utils.data.DataLoader(CustomDataset(X_train, y_train.argmax(1), device), batch_size=256, shuffle=True)
val_loader = torch.utils.data.DataLoader(CustomDataset(X_val, y_val.argmax(1), device), batch_size=256)

criterion = nn.CrossEntropyLoss()
# criterion = nn.functional.cross_entropy
optimizer = optim.Adam(model.parameters(), lr=0.001)

best_val_loss = float('inf')
patience = 100
counter = 0
num_epochs = 200

start_epoch = 0
for epoch in range(num_epochs):
    print(f"Epoch: {start_epoch + epoch + 1}/{start_epoch + num_epochs}")
    # Training loop
    model.train()
    train_loss_accumulator = 0.0
    train_correct_accumulator = 0
    train_total_samples = 0
    for batch_idx, (data, targets) in enumerate(train_loader):
        # print(f"Data: {data}")
        # print(f"Targets: {targets}")
        # print(f"Batch: {batch_idx}")
        optimizer.zero_grad()
        outputs = model(data)
        #print(f"Outputs: {outputs}")
        #print(f"Targets: {targets}")
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss_accumulator += loss.item() * data.size(0)
        _, predicted = torch.max(outputs, 1)
        train_correct_accumulator += (predicted == targets).sum().item()
        train_total_samples += data.size(0)
    train_loss = train_loss_accumulator / train_total_samples
    train_accuracy = train_correct_accumulator / train_total_samples

    #Validation loop
    model.eval()
    val_loss_accumulator = 0.0
    val_correct_accumulator = 0
    val_total_samples = 0
    with torch.no_grad():
        val_loss = 0
        for data, targets in val_loader:
            outputs = model(data)
            loss = criterion(outputs, targets)
            val_loss_accumulator += loss.item() * data.size(0)
            _, predicted = torch.max(outputs, 1)
            val_correct_accumulator += (predicted==targets).sum().item()
            val_total_samples += data.size(0)

        val_loss = val_loss_accumulator / val_total_samples
        val_accuracy = val_correct_accumulator / val_total_samples

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            counter = 0
        else:
            counter += 1
            if counter >= patience:
                print("Early stopping triggered")
                break
    print(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}")
    print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}")

Epoch: 1/200
Train Loss: 5.5167, Train Accuracy: 0.0179
Validation Loss: 5.4873, Validation Accuracy: 0.0420
Epoch: 2/200
Train Loss: 5.4705, Train Accuracy: 0.0675
Validation Loss: 5.4025, Validation Accuracy: 0.1434
Epoch: 3/200
Train Loss: 5.3942, Train Accuracy: 0.1508
Validation Loss: 5.2948, Validation Accuracy: 0.2499
Epoch: 4/200
Train Loss: 5.3252, Train Accuracy: 0.2217
Validation Loss: 5.2281, Validation Accuracy: 0.3155
Epoch: 5/200
Train Loss: 5.2635, Train Accuracy: 0.2838
Validation Loss: 5.1517, Validation Accuracy: 0.3890
Epoch: 6/200
Train Loss: 5.2068, Train Accuracy: 0.3432
Validation Loss: 5.0901, Validation Accuracy: 0.4546
Epoch: 7/200
Train Loss: 5.1656, Train Accuracy: 0.3810
Validation Loss: 5.0592, Validation Accuracy: 0.4844
Epoch: 8/200
Train Loss: 5.1277, Train Accuracy: 0.4191
Validation Loss: 5.0410, Validation Accuracy: 0.4966
Epoch: 9/200
Train Loss: 5.1055, Train Accuracy: 0.4398
Validation Loss: 5.0239, Validation Accuracy: 0.5145
Epoch: 10/200
Train