In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torchvision
import torchvision.transforms as transforms
import glob
from sklearn.model_selection import train_test_split
import torch.nn as nn
import torch.nn.functional as F
from time import time
from torchsummary import summary
import math
import copy
import csv
from sklearn.metrics import confusion_matrix

In [2]:
lr = 0.001
numepoch = 50
seed = 0
optimizer = torch.optim.Adam
loss_function = nn.CrossEntropyLoss()
batch_size = 20
kernel_size = 5

num_kernel1 = 60
num_kernel2 = 30
first_layer_neurons = 500
second_layer_neurons = 150

torch.manual_seed(seed)

<torch._C.Generator at 0x23519cf0cf0>

In [3]:
classes = ('English', 'Mandarin', 'Persian')

# Get dataset
train_data = torchvision.datasets.ImageFolder(root="./train/", transform=transforms.ToTensor())
valid_data = torchvision.datasets.ImageFolder(root="./valid/", transform=transforms.ToTensor())
test_data = torchvision.datasets.ImageFolder(root="./test/", transform=transforms.ToTensor())

In [4]:
class Net(nn.Module):
    
    def __init__(self, num_kernel1, num_kernel2, kernel_size, first_layer_neurons, second_layer_neurons):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, num_kernel1, kernel_size = kernel_size)
        self.conv2 = nn.Conv2d(num_kernel1, num_kernel2, kernel_size = kernel_size)

        self.pool = nn.MaxPool2d(2,2)
        self.drop_out = nn.Dropout()
        
        self.img_size = [64,108]
        self.img_width = int((int((108 - kernel_size + 1)/2) - kernel_size + 1)/2)
        self.img_height = int((int((64 - kernel_size + 1)/2) - kernel_size + 1)/2)
        
        self.img_size = num_kernel2*self.img_height*self.img_width
        
        self.fc1 = nn.Linear(self.img_size, first_layer_neurons)
        self.fc2 = nn.Linear(first_layer_neurons, second_layer_neurons)
        self.fc3 = nn.Linear(second_layer_neurons, 3)
        self.fc1_BN = nn.BatchNorm1d(first_layer_neurons)
        self.fc2_BN = nn.BatchNorm1d(second_layer_neurons)
        
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        
        x = x.view(-1,self.img_size)
        x = self.drop_out(x)
        
        x = F.relu(self.fc1_BN(self.fc1(x)))
        x = F.relu(self.fc2_BN(self.fc2(x)))
        x = self.fc3(x)
        x = F.softmax(x,dim=1)
        return x

#one hot encoding for label vector, used for MSELoss
def onehot(labels):
    one_hot_labels = np.zeros((len(labels),3))
    for i in range(len(labels)):
        one_hot_labels[i][labels[i]] = 1
    return torch.from_numpy(one_hot_labels)

#compute accuracy of the given model on data in loader
def accuracy(net, loader):
    correct = 0
    total = 0
    with torch.no_grad():
        for data in loader:
            images, labels = data
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return correct/total

In [5]:
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=batch_size)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size)
net = Net(num_kernel1, num_kernel2, kernel_size, first_layer_neurons, second_layer_neurons)
optimizer = torch.optim.Adam(net.parameters(),lr)

In [None]:
trainLossRec = []
validLossRec = []
testLossRec = []
trainAccRec = []
validAccRec = []
testAccRec = []
nRec = []
startTime = time()
best_accuV = 0

for epoch in range(numepoch):
    running_loss = 0.0
    numbatch = 0
    net.train()
    for i, train_data in enumerate(train_loader, 0):
        inputs, labels = train_data
        optimizer.zero_grad()
        predictT = net(inputs.float())
        #lossT= loss_function(input=predictT.squeeze(), target=onehot(labels).float()) #MSE
        lossT= loss_function(predictT, labels) #CrossEntropy
        lossT.backward()
        optimizer.step()
        running_loss += lossT.item()
        numbatch += 1
    trainLossRec.append(running_loss/numbatch)
    accuT = accuracy(net,train_loader)
    trainAccRec.append(accuT)

    running_loss = 0.0
    numbatch = 0
    net.eval()
    for i, valid_data in enumerate(valid_loader):
        inputs, labels = valid_data
        predictV = net(inputs.float())
        #lossV = loss_function(input=predictV.squeeze(), target=onehot(labels).float()) #MSE
        lossV= loss_function(predictV, labels) #CrossEntropy
        running_loss += lossV.item()
        numbatch += 1
    validLossRec.append(running_loss/numbatch)
    accuV = accuracy(net,valid_loader)
    validAccRec.append(accuV)

    if (accuV > best_accuV):
        best_accuV = accuV
        best_model_weights = copy.deepcopy(net.state_dict())

    nRec.append(epoch)
    print("Epoch:",nRec[-1]+1,"Training Accuracy:",accuT,"Validation Accuracy:",accuV)

timeElapsed = time()-startTime
print('Training Time:',timeElapsed)

In [None]:
# PLOT
plt.plot(nRec,trainLossRec, label='Train')
plt.plot(nRec,validLossRec, label='Validation')
plt.title("Loss vs. Epoch")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.show()

plt.plot(nRec,trainAccRec, label='Train')
plt.plot(nRec,validAccRec, label='Validation')
plt.title("Accuracy vs. Epoch")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.show()

In [9]:
print("Best CNN Model")
#net.load_state_dict(best_model_weights)
print("Training Accuracy: ", accuracy(net, train_loader))
print("Validation Accuracy: ", accuracy(net, valid_loader))
print("Test Accuracy: ", accuracy(net, test_loader))

Best CNN Model
Training Accuracy:  0.9933333333333333
Validation Accuracy:  0.36333333333333334
Test Accuracy:  0.41333333333333333


In [None]:
#torch.save(net.state_dict(), 'Best_CNN_Model.pt')

In [12]:
net = Net(60,30,5,500,150)
net.load_state_dict(torch.load("./Best_CNN_model.pt"))
for data, label in train_loader:
    prediction = net(data.float())
    print("label: ", label)
    print("prediction: ", prediction)

label:  tensor([1, 1, 0, 2, 1, 0, 2, 1, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 1, 2])
prediction:  tensor([[1.2677e-04, 9.9925e-01, 6.2247e-04],
        [1.1311e-05, 9.9872e-01, 1.2702e-03],
        [9.8595e-01, 5.1343e-05, 1.3997e-02],
        [4.4059e-03, 3.6951e-06, 9.9559e-01],
        [1.4589e-03, 9.1997e-01, 7.8567e-02],
        [9.9681e-01, 1.6979e-07, 3.1874e-03],
        [2.0086e-06, 1.0620e-04, 9.9989e-01],
        [7.9864e-06, 9.9983e-01, 1.5793e-04],
        [9.8437e-01, 2.4843e-07, 1.5633e-02],
        [9.9993e-01, 3.1626e-06, 7.0347e-05],
        [3.0851e-06, 9.9984e-01, 1.6139e-04],
        [1.0000e+00, 6.8315e-08, 2.7662e-06],
        [9.9948e-01, 3.2924e-07, 5.1785e-04],
        [9.9697e-01, 2.1742e-04, 2.8146e-03],
        [6.2046e-07, 9.6223e-05, 9.9990e-01],
        [1.5825e-05, 9.9998e-01, 2.7043e-07],
        [2.6767e-06, 9.9391e-01, 6.0899e-03],
        [1.9278e-04, 9.9979e-01, 1.6888e-05],
        [7.8916e-04, 9.6822e-01, 3.0988e-02],
        [1.3595e-04, 1.7091e-04, 9.9

label:  tensor([0, 2, 1, 0, 1, 1, 1, 2, 0, 2, 1, 0, 0, 1, 1, 2, 2, 0, 0, 2])
prediction:  tensor([[1.0000e+00, 1.7058e-06, 7.8249e-07],
        [1.3491e-05, 3.4340e-05, 9.9995e-01],
        [3.5878e-04, 9.8564e-01, 1.4005e-02],
        [9.9995e-01, 8.8998e-06, 3.9834e-05],
        [3.0583e-05, 9.9991e-01, 6.1059e-05],
        [1.5678e-04, 9.9947e-01, 3.6935e-04],
        [1.9414e-07, 9.9990e-01, 1.0417e-04],
        [4.7135e-04, 3.8465e-05, 9.9949e-01],
        [9.9794e-01, 1.1675e-03, 8.9181e-04],
        [1.1909e-06, 2.5450e-05, 9.9997e-01],
        [8.4643e-05, 9.9991e-01, 9.8273e-06],
        [1.0000e+00, 1.0280e-07, 1.4056e-06],
        [9.9702e-01, 6.7413e-07, 2.9792e-03],
        [1.7169e-08, 9.9999e-01, 1.1223e-05],
        [7.3078e-06, 9.9731e-01, 2.6789e-03],
        [4.9648e-04, 6.5686e-04, 9.9885e-01],
        [5.9663e-05, 6.2815e-06, 9.9993e-01],
        [9.8740e-01, 4.8238e-04, 1.2121e-02],
        [9.9184e-01, 5.9895e-06, 8.1574e-03],
        [1.4613e-03, 5.4304e-03, 9.9

label:  tensor([0, 1, 1, 1, 2, 1, 0, 2, 0, 2, 1, 0, 2, 2, 1, 2, 1, 0, 2, 2])
prediction:  tensor([[9.9999e-01, 8.2953e-06, 6.3765e-06],
        [5.4994e-06, 9.9972e-01, 2.7256e-04],
        [5.7279e-05, 9.9356e-01, 6.3849e-03],
        [1.1295e-06, 9.9996e-01, 3.9116e-05],
        [6.3667e-04, 6.7566e-04, 9.9869e-01],
        [1.5025e-04, 9.9910e-01, 7.4844e-04],
        [1.0000e+00, 1.2632e-07, 1.8793e-06],
        [3.2067e-04, 2.6679e-05, 9.9965e-01],
        [9.9998e-01, 1.4445e-05, 6.2689e-06],
        [1.0950e-04, 7.4809e-06, 9.9988e-01],
        [7.4887e-08, 9.9998e-01, 1.9070e-05],
        [9.9680e-01, 1.2051e-05, 3.1881e-03],
        [6.0390e-04, 8.0834e-05, 9.9932e-01],
        [6.9261e-06, 9.0455e-06, 9.9998e-01],
        [1.5721e-06, 1.0000e+00, 2.5477e-06],
        [6.6670e-03, 9.5256e-04, 9.9238e-01],
        [6.1060e-03, 9.9368e-01, 2.1182e-04],
        [9.9998e-01, 1.4878e-05, 8.8953e-07],
        [1.4534e-03, 3.1934e-06, 9.9854e-01],
        [3.0845e-04, 4.3437e-04, 9.9

label:  tensor([1, 1, 2, 0, 0, 1, 0, 2, 1, 0, 1, 0, 0, 1, 2, 0, 0, 1, 1, 0])
prediction:  tensor([[1.9557e-08, 9.9974e-01, 2.5693e-04],
        [2.4095e-05, 9.9926e-01, 7.1482e-04],
        [5.2307e-03, 6.2093e-08, 9.9477e-01],
        [9.9999e-01, 4.4180e-07, 7.4048e-06],
        [9.7396e-01, 3.2086e-03, 2.2829e-02],
        [8.3228e-07, 9.9996e-01, 4.3956e-05],
        [9.5396e-01, 4.3401e-02, 2.6351e-03],
        [2.2665e-04, 8.6378e-05, 9.9969e-01],
        [7.6522e-06, 9.8704e-01, 1.2948e-02],
        [9.9970e-01, 1.8343e-05, 2.8397e-04],
        [1.5136e-05, 9.5096e-01, 4.9025e-02],
        [9.9998e-01, 1.1160e-05, 7.5241e-06],
        [7.9160e-01, 6.9243e-06, 2.0839e-01],
        [1.6560e-06, 9.9979e-01, 2.0833e-04],
        [5.1650e-06, 1.8173e-06, 9.9999e-01],
        [9.9980e-01, 3.0737e-05, 1.6576e-04],
        [9.9976e-01, 7.7477e-07, 2.3540e-04],
        [8.8214e-09, 9.7079e-01, 2.9211e-02],
        [2.9484e-05, 9.9448e-01, 5.4924e-03],
        [9.9749e-01, 6.1325e-06, 2.5

label:  tensor([2, 1, 1, 0, 0, 0, 1, 1, 2, 0, 0, 1, 0, 0, 0, 2, 0, 2, 1, 0])
prediction:  tensor([[1.9790e-05, 7.0596e-05, 9.9991e-01],
        [8.7078e-06, 9.9799e-01, 2.0053e-03],
        [9.4417e-08, 9.9999e-01, 1.2808e-05],
        [9.8272e-01, 9.9335e-03, 7.3513e-03],
        [9.9876e-01, 2.8731e-06, 1.2417e-03],
        [9.9866e-01, 9.1649e-05, 1.2461e-03],
        [1.1177e-05, 9.9983e-01, 1.5663e-04],
        [9.7834e-07, 9.9966e-01, 3.4020e-04],
        [1.0510e-05, 2.4141e-05, 9.9997e-01],
        [9.9426e-01, 1.0974e-04, 5.6275e-03],
        [9.9906e-01, 4.8491e-04, 4.5221e-04],
        [2.5150e-06, 9.9702e-01, 2.9799e-03],
        [9.8113e-01, 6.9196e-03, 1.1953e-02],
        [9.9958e-01, 1.7408e-06, 4.1381e-04],
        [9.9999e-01, 6.2669e-06, 5.1545e-06],
        [3.4274e-05, 1.6272e-05, 9.9995e-01],
        [9.9176e-01, 1.3834e-04, 8.1001e-03],
        [2.3790e-04, 1.9196e-04, 9.9957e-01],
        [1.1094e-10, 9.9936e-01, 6.4191e-04],
        [9.9917e-01, 1.9363e-06, 8.2

label:  tensor([0, 1, 0, 0, 0, 0, 1, 2, 0, 0, 2, 1, 1, 2, 2, 0, 2, 1, 2, 1])
prediction:  tensor([[9.8391e-01, 1.1647e-02, 4.4401e-03],
        [1.5361e-06, 1.0000e+00, 1.0908e-06],
        [9.9328e-01, 7.9898e-05, 6.6443e-03],
        [9.9998e-01, 1.2078e-07, 1.8648e-05],
        [9.9836e-01, 1.5051e-03, 1.3448e-04],
        [9.9920e-01, 1.9791e-05, 7.8333e-04],
        [4.2749e-07, 9.9723e-01, 2.7744e-03],
        [2.5385e-06, 1.1007e-04, 9.9989e-01],
        [9.9938e-01, 6.2110e-04, 3.2187e-06],
        [9.9992e-01, 5.7227e-09, 7.7893e-05],
        [7.5090e-06, 4.0017e-04, 9.9959e-01],
        [3.5133e-08, 9.9990e-01, 9.5197e-05],
        [1.1970e-03, 9.0873e-01, 9.0071e-02],
        [1.7122e-03, 1.9755e-02, 9.7853e-01],
        [9.5709e-04, 1.2792e-05, 9.9903e-01],
        [9.9904e-01, 3.0893e-04, 6.5262e-04],
        [1.3995e-03, 2.1600e-06, 9.9860e-01],
        [1.1233e-05, 9.9982e-01, 1.6597e-04],
        [1.1525e-03, 1.9800e-03, 9.9687e-01],
        [6.9876e-08, 9.9678e-01, 3.2

In [11]:
#compute confusion matrix
ground_truth = []
predictions = []
with torch.no_grad():
    for data in train_loader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        predictions += predicted.tolist()
        ground_truth += labels.tolist()
print(confusion_matrix(ground_truth, predictions))

[[296   0   4]
 [  0 297   3]
 [  3   2 295]]
