In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torchvision
import torchvision.transforms as transforms
import glob
from sklearn.model_selection import train_test_split
import torch.nn as nn
import torch.nn.functional as F
from time import time
from torchsummary import summary
import math
import copy
import csv
from sklearn.metrics import confusion_matrix

In [2]:
lr = 0.001
numepoch = 50
seed = 0
optimizer = torch.optim.Adam
loss_function = nn.CrossEntropyLoss()
batch_size = 20
kernel_size = 5

num_kernel1 = 60
num_kernel2 = 30
first_layer_neurons = 500
second_layer_neurons = 150

torch.manual_seed(seed)

<torch._C.Generator at 0x23519cf0cf0>

In [3]:
classes = ('English', 'Mandarin', 'Persian')

# Get dataset
train_data = torchvision.datasets.ImageFolder(root="./train/", transform=transforms.ToTensor())
valid_data = torchvision.datasets.ImageFolder(root="./valid/", transform=transforms.ToTensor())
test_data = torchvision.datasets.ImageFolder(root="./test/", transform=transforms.ToTensor())

In [4]:
class Net(nn.Module):
    
    def __init__(self, num_kernel1, num_kernel2, kernel_size, first_layer_neurons, second_layer_neurons):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, num_kernel1, kernel_size = kernel_size)
        self.conv2 = nn.Conv2d(num_kernel1, num_kernel2, kernel_size = kernel_size)

        self.pool = nn.MaxPool2d(2,2)
        self.drop_out = nn.Dropout()
        
        self.img_size = [64,108]
        self.img_width = int((int((108 - kernel_size + 1)/2) - kernel_size + 1)/2)
        self.img_height = int((int((64 - kernel_size + 1)/2) - kernel_size + 1)/2)
        
        self.img_size = num_kernel2*self.img_height*self.img_width
        
        self.fc1 = nn.Linear(self.img_size, first_layer_neurons)
        self.fc2 = nn.Linear(first_layer_neurons, second_layer_neurons)
        self.fc3 = nn.Linear(second_layer_neurons, 3)
        self.fc1_BN = nn.BatchNorm1d(first_layer_neurons)
        self.fc2_BN = nn.BatchNorm1d(second_layer_neurons)
        
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        
        x = x.view(-1,self.img_size)
        x = self.drop_out(x)
        
        x = F.relu(self.fc1_BN(self.fc1(x)))
        x = F.relu(self.fc2_BN(self.fc2(x)))
        x = self.fc3(x)
        x = F.softmax(x,dim=1)
        return x

#one hot encoding for label vector, used for MSELoss
def onehot(labels):
    one_hot_labels = np.zeros((len(labels),3))
    for i in range(len(labels)):
        one_hot_labels[i][labels[i]] = 1
    return torch.from_numpy(one_hot_labels)

#compute accuracy of the given model on data in loader
def accuracy(net, loader):
    correct = 0
    total = 0
    with torch.no_grad():
        for data in loader:
            images, labels = data
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return correct/total

In [5]:
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=batch_size)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size)
net = Net(num_kernel1, num_kernel2, kernel_size, first_layer_neurons, second_layer_neurons)
optimizer = torch.optim.Adam(net.parameters(),lr)

In [None]:
trainLossRec = []
validLossRec = []
testLossRec = []
trainAccRec = []
validAccRec = []
testAccRec = []
nRec = []
startTime = time()
best_accuV = 0

for epoch in range(numepoch):
    running_loss = 0.0
    numbatch = 0
    net.train()
    for i, train_data in enumerate(train_loader, 0):
        inputs, labels = train_data
        optimizer.zero_grad()
        predictT = net(inputs.float())
        #lossT= loss_function(input=predictT.squeeze(), target=onehot(labels).float()) #MSE
        lossT= loss_function(predictT, labels) #CrossEntropy
        lossT.backward()
        optimizer.step()
        running_loss += lossT.item()
        numbatch += 1
    trainLossRec.append(running_loss/numbatch)
    accuT = accuracy(net,train_loader)
    trainAccRec.append(accuT)

    running_loss = 0.0
    numbatch = 0
    net.eval()
    for i, valid_data in enumerate(valid_loader):
        inputs, labels = valid_data
        predictV = net(inputs.float())
        #lossV = loss_function(input=predictV.squeeze(), target=onehot(labels).float()) #MSE
        lossV= loss_function(predictV, labels) #CrossEntropy
        running_loss += lossV.item()
        numbatch += 1
    validLossRec.append(running_loss/numbatch)
    accuV = accuracy(net,valid_loader)
    validAccRec.append(accuV)

    if (accuV > best_accuV):
        best_accuV = accuV
        best_model_weights = copy.deepcopy(net.state_dict())

    nRec.append(epoch)
    print("Epoch:",nRec[-1]+1,"Training Accuracy:",accuT,"Validation Accuracy:",accuV)

timeElapsed = time()-startTime
print('Training Time:',timeElapsed)

In [None]:
# PLOT
plt.plot(nRec,trainLossRec, label='Train')
plt.plot(nRec,validLossRec, label='Validation')
plt.title("Loss vs. Epoch")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.show()

plt.plot(nRec,trainAccRec, label='Train')
plt.plot(nRec,validAccRec, label='Validation')
plt.title("Accuracy vs. Epoch")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.show()

In [9]:
print("Best CNN Model")
#net.load_state_dict(best_model_weights)
print("Training Accuracy: ", accuracy(net, train_loader))
print("Validation Accuracy: ", accuracy(net, valid_loader))
print("Test Accuracy: ", accuracy(net, test_loader))

Best CNN Model
Training Accuracy:  0.9933333333333333
Validation Accuracy:  0.36333333333333334
Test Accuracy:  0.41333333333333333


In [None]:
#torch.save(net.state_dict(), 'Best_CNN_Model.pt')

In [10]:
net = Net(60,30,5,500,150)
net.load_state_dict(torch.load("./Best_CNN_model.pt"))
for data, label in train_loader:
    prediction = net(data.float())
    print("label: ", label)
    print("prediction: ", prediction)

lebel:  tensor([0, 2, 1, 0, 2, 1, 1, 2, 0, 1, 0, 2, 1, 0, 2, 1, 1, 2, 0, 2])
prediction:  tensor([[9.9954e-01, 7.6752e-07, 4.5465e-04],
        [1.8695e-05, 1.1598e-03, 9.9882e-01],
        [1.7270e-05, 9.9988e-01, 1.0409e-04],
        [9.9962e-01, 2.0182e-07, 3.7818e-04],
        [3.0976e-03, 2.2623e-05, 9.9688e-01],
        [1.8716e-06, 9.9752e-01, 2.4808e-03],
        [4.6612e-04, 9.9942e-01, 1.1157e-04],
        [1.3123e-03, 1.1933e-04, 9.9857e-01],
        [9.9999e-01, 2.6619e-06, 5.2814e-06],
        [1.3551e-04, 9.9976e-01, 9.9581e-05],
        [9.9986e-01, 3.8077e-05, 1.0076e-04],
        [2.0529e-04, 3.2191e-05, 9.9976e-01],
        [1.2774e-08, 9.9999e-01, 9.4284e-06],
        [1.0000e+00, 2.2019e-06, 1.0240e-07],
        [6.7140e-07, 2.6930e-04, 9.9973e-01],
        [4.0291e-05, 9.9498e-01, 4.9753e-03],
        [4.3039e-04, 9.9946e-01, 1.1017e-04],
        [4.5283e-04, 2.7481e-04, 9.9927e-01],
        [9.9906e-01, 1.7790e-05, 9.2546e-04],
        [3.3288e-05, 1.8369e-05, 9.9

lebel:  tensor([2, 2, 2, 2, 2, 0, 0, 1, 1, 2, 2, 0, 2, 1, 2, 2, 1, 0, 0, 0])
prediction:  tensor([[7.9663e-04, 8.4803e-02, 9.1440e-01],
        [5.5411e-05, 1.3201e-04, 9.9981e-01],
        [4.7013e-05, 7.0687e-05, 9.9988e-01],
        [4.8543e-02, 1.0699e-05, 9.5145e-01],
        [6.8626e-03, 1.7129e-01, 8.2185e-01],
        [9.8934e-01, 7.2813e-04, 9.9291e-03],
        [9.9964e-01, 3.2588e-06, 3.5419e-04],
        [4.8643e-05, 9.9991e-01, 3.6494e-05],
        [2.2361e-06, 9.9998e-01, 1.8339e-05],
        [7.0162e-04, 2.9951e-02, 9.6935e-01],
        [2.8912e-02, 4.1180e-02, 9.2991e-01],
        [1.0000e+00, 3.3990e-06, 4.3831e-07],
        [5.5534e-03, 1.8560e-02, 9.7589e-01],
        [8.5972e-07, 1.0000e+00, 9.3960e-07],
        [1.4450e-03, 1.3138e-04, 9.9842e-01],
        [1.1315e-06, 6.0007e-02, 9.3999e-01],
        [1.8585e-08, 9.9993e-01, 7.0070e-05],
        [9.9972e-01, 1.4008e-05, 2.7029e-04],
        [9.9994e-01, 6.9085e-08, 5.9408e-05],
        [1.0000e+00, 1.0495e-06, 2.1

lebel:  tensor([1, 2, 0, 0, 1, 2, 1, 2, 0, 1, 0, 0, 0, 2, 0, 1, 1, 1, 1, 1])
prediction:  tensor([[8.4019e-05, 9.9929e-01, 6.2567e-04],
        [6.0367e-05, 6.2625e-05, 9.9988e-01],
        [9.8882e-01, 9.0855e-07, 1.1184e-02],
        [9.9989e-01, 2.4586e-07, 1.0562e-04],
        [2.3595e-05, 9.4707e-01, 5.2907e-02],
        [2.9580e-04, 8.1809e-05, 9.9962e-01],
        [4.9665e-05, 9.9490e-01, 5.0497e-03],
        [2.3314e-09, 1.1153e-05, 9.9999e-01],
        [9.9985e-01, 1.1454e-04, 3.0897e-05],
        [4.5161e-06, 9.9999e-01, 1.0032e-05],
        [9.9999e-01, 4.1040e-06, 1.0749e-05],
        [9.9625e-01, 3.1804e-04, 3.4350e-03],
        [9.9985e-01, 8.8621e-09, 1.5266e-04],
        [1.9753e-05, 3.0323e-04, 9.9968e-01],
        [9.9835e-01, 1.3261e-07, 1.6506e-03],
        [4.1559e-04, 9.7871e-01, 2.0870e-02],
        [1.0598e-05, 9.9730e-01, 2.6886e-03],
        [1.8759e-06, 1.0000e+00, 1.3875e-06],
        [4.2404e-04, 1.7369e-01, 8.2589e-01],
        [2.3850e-02, 9.7588e-01, 2.6

lebel:  tensor([0, 0, 1, 1, 0, 2, 1, 1, 0, 0, 2, 2, 2, 2, 0, 2, 0, 2, 2, 0])
prediction:  tensor([[9.9990e-01, 5.2035e-06, 9.5159e-05],
        [9.6774e-01, 2.5553e-02, 6.7108e-03],
        [8.2096e-09, 1.0000e+00, 1.5031e-08],
        [2.8236e-03, 9.4707e-01, 5.0107e-02],
        [9.9933e-01, 2.3185e-05, 6.4881e-04],
        [5.0353e-03, 1.8011e-01, 8.1485e-01],
        [2.9611e-06, 9.9997e-01, 3.0548e-05],
        [8.9030e-08, 9.9917e-01, 8.3153e-04],
        [9.9999e-01, 2.0064e-06, 4.0940e-06],
        [9.9424e-01, 5.4872e-03, 2.7216e-04],
        [1.8944e-03, 9.1691e-04, 9.9719e-01],
        [8.0490e-04, 2.9371e-04, 9.9890e-01],
        [2.0931e-03, 1.2924e-04, 9.9778e-01],
        [2.5144e-08, 4.3281e-05, 9.9996e-01],
        [9.9992e-01, 7.3953e-05, 1.9328e-06],
        [1.8090e-04, 1.8883e-04, 9.9963e-01],
        [9.9754e-01, 1.6340e-03, 8.2870e-04],
        [2.5953e-05, 1.7090e-02, 9.8288e-01],
        [8.7477e-05, 4.7897e-04, 9.9943e-01],
        [9.9285e-01, 5.3463e-07, 7.1

lebel:  tensor([1, 1, 2, 0, 0, 1, 0, 1, 2, 1, 1, 2, 0, 1, 0, 2, 0, 2, 1, 0])
prediction:  tensor([[3.4263e-06, 9.9799e-01, 2.0080e-03],
        [7.3563e-06, 9.9999e-01, 4.9856e-06],
        [6.5890e-07, 2.6758e-05, 9.9997e-01],
        [9.9995e-01, 7.9337e-06, 4.6813e-05],
        [9.9997e-01, 6.1938e-07, 2.4937e-05],
        [1.8409e-04, 9.9624e-01, 3.5737e-03],
        [9.9450e-01, 2.0323e-04, 5.3007e-03],
        [3.6304e-03, 9.5559e-01, 4.0775e-02],
        [2.1591e-04, 2.5514e-04, 9.9953e-01],
        [9.9993e-05, 9.8410e-01, 1.5804e-02],
        [2.0081e-04, 9.9980e-01, 5.8599e-08],
        [2.3502e-06, 7.4646e-04, 9.9925e-01],
        [9.9968e-01, 1.0221e-08, 3.2044e-04],
        [6.8788e-09, 9.9835e-01, 1.6512e-03],
        [9.7717e-01, 2.6417e-05, 2.2800e-02],
        [9.3439e-03, 1.9322e-07, 9.9066e-01],
        [9.9942e-01, 7.1302e-06, 5.7298e-04],
        [6.6593e-05, 8.2299e-04, 9.9911e-01],
        [5.1460e-06, 9.9918e-01, 8.1203e-04],
        [9.9999e-01, 7.9420e-06, 5.3

lebel:  tensor([2, 1, 1, 1, 0, 2, 1, 0, 2, 1, 1, 0, 2, 0, 2, 1, 2, 2, 0, 2])
prediction:  tensor([[5.1210e-03, 8.4565e-04, 9.9403e-01],
        [2.6613e-07, 9.9943e-01, 5.6583e-04],
        [3.8679e-05, 9.9979e-01, 1.7627e-04],
        [5.3822e-06, 9.9965e-01, 3.4599e-04],
        [9.9977e-01, 1.2445e-05, 2.2164e-04],
        [3.2397e-04, 3.8905e-03, 9.9579e-01],
        [1.3379e-06, 9.9996e-01, 3.8445e-05],
        [1.0000e+00, 6.8806e-07, 3.3256e-06],
        [8.8304e-05, 1.4010e-03, 9.9851e-01],
        [6.8519e-06, 9.9885e-01, 1.1442e-03],
        [1.0103e-03, 9.9895e-01, 3.5373e-05],
        [9.9999e-01, 3.6599e-09, 7.4716e-06],
        [3.5320e-04, 4.9844e-06, 9.9964e-01],
        [9.9981e-01, 7.4662e-08, 1.8847e-04],
        [5.8876e-05, 7.2475e-04, 9.9922e-01],
        [5.5716e-04, 9.9903e-01, 4.1607e-04],
        [2.0544e-04, 2.1197e-04, 9.9958e-01],
        [8.1136e-03, 7.2964e-06, 9.9188e-01],
        [9.9904e-01, 9.5520e-04, 3.5585e-07],
        [6.4300e-04, 3.4637e-03, 9.9

In [11]:
#compute confusion matrix
ground_truth = []
predictions = []
with torch.no_grad():
    for data in train_loader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        predictions += predicted.tolist()
        ground_truth += labels.tolist()
print(confusion_matrix(ground_truth, predictions))

[[296   0   4]
 [  0 297   3]
 [  3   2 295]]
