In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import os
from torchsummary import summary

In [2]:
class Net(nn.Module):
    
    def __init__(self, num_kernel1, num_kernel2, kernel_size, first_layer_neurons, second_layer_neurons):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, num_kernel1, kernel_size = kernel_size)
        self.conv2 = nn.Conv2d(num_kernel1, num_kernel2, kernel_size = kernel_size)

        self.pool = nn.MaxPool2d(2,2)
        self.drop_out = nn.Dropout()
        
        self.img_size = [64,108]
        self.img_width = int((int((108 - kernel_size + 1)/2) - kernel_size + 1)/2)
        self.img_height = int((int((64 - kernel_size + 1)/2) - kernel_size + 1)/2)
        
        self.img_size = num_kernel2*self.img_height*self.img_width
        
        self.fc1 = nn.Linear(self.img_size, first_layer_neurons)
        self.fc2 = nn.Linear(first_layer_neurons, second_layer_neurons)
        self.fc3 = nn.Linear(second_layer_neurons, 3)
        self.fc1_BN = nn.BatchNorm1d(first_layer_neurons)
        self.fc2_BN = nn.BatchNorm1d(second_layer_neurons)
        
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        
        x = x.view(-1,self.img_size)
        x = self.drop_out(x)
        
        x = F.relu(self.fc1_BN(self.fc1(x)))
        x = F.relu(self.fc2_BN(self.fc2(x)))
        x = self.fc3(x)
        x = F.softmax(x,dim=1)
        return x

In [3]:
specs = torchvision.datasets.ImageFolder(root="./Other Spectrograms/", transform=transforms.ToTensor())
loader = torch.utils.data.DataLoader(specs, batch_size=1, shuffle=False)

In [3]:
net = Net(60,30,5,500,150)
net.load_state_dict(torch.load("./Best_CNN_model.pt"))

<All keys matched successfully>

In [8]:
net.eval()
for i, data in enumerate(loader, 0):
    inputs, labels = data
    prediction = net(inputs.float())
    if labels[0] == 0:
        lang = "Arabic"
    elif labels[0] == 1:
        lang = "German"
    elif labels[0] == 2:
        lang = "Japanese"
    print("\nlabel: ", lang)
    print("English: ",prediction[0][0].item())
    print("Mandarin: ",prediction[0][1].item())
    print("Persian: ",prediction[0][2].item())


label:  Arabic
English:  0.0002300515043316409
Mandarin:  0.053879328072071075
Persian:  0.9458906054496765

label:  Arabic
English:  4.3569711124291644e-05
Mandarin:  0.9178247451782227
Persian:  0.08213173598051071

label:  Arabic
English:  3.3863861972349696e-06
Mandarin:  0.9944040775299072
Persian:  0.0055924346670508385

label:  Arabic
English:  3.0044659069972113e-05
Mandarin:  0.9911993145942688
Persian:  0.008770695887506008

label:  Arabic
English:  0.00019138713832944632
Mandarin:  0.505357027053833
Persian:  0.494451642036438

label:  Arabic
English:  0.0002135637914761901
Mandarin:  0.8143026828765869
Persian:  0.18548379838466644

label:  Arabic
English:  3.6156809073872864e-05
Mandarin:  0.059225164353847504
Persian:  0.9407386779785156

label:  Arabic
English:  3.5001237847609445e-05
Mandarin:  0.541527509689331
Persian:  0.4584374725818634

label:  Arabic
English:  6.0382779338397086e-05
Mandarin:  0.9530479311943054
Persian:  0.04689165949821472

label:  Arabic
Engli

In [4]:
summary(net, input_size = (3,64,108))

  return torch._C._cuda_getDeviceCount() > 0


RuntimeError: shape '[-1, 9360]' is invalid for input of size 7260