In [2]:
from torch.utils.data import Dataset, DataLoader
from torch import nn
from torchvision import models
import torchvision
import torchvision.transforms as transforms
import torch
import numpy as np
import numpy
import pandas as pd
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
from PIL import Image
from torch.optim import Adam
from torch.optim.lr_scheduler import StepLR
from termcolor import colored

In [3]:
class CustomDatasetFromImages(Dataset):
    def __init__(self, table, transforms=None):
        # Read the csv file
        self.data_info = pd.read_csv(table)
        # First column contains the image paths
        self.image_arr = np.asarray(self.data_info.iloc[:, 0])
        # Second column is the labels
        self.label_arr = np.asarray(self.data_info.iloc[:, 1])
        # Calculate len
        self.data_len = len(self.data_info.index)
        # Transformation
        self.transforms = transforms
        # Check number of training label 0 and label 1
        print(len(self.data_info[self.data_info["label"]==1]))
        print(len(self.data_info[self.data_info["label"]==0]))

    def __getitem__(self, index):
        # Get image name from the pandas df
        single_image_name = self.image_arr[index]
        # Open image
#         img_as_img = Image.open(single_image_name).convert('RGB')
        img_as_img = Image.open(single_image_name).convert('1')

        # Do some operation on image and Transform image to tensor
        img_as_img = img_as_img.resize((224,224))
        if self.transforms is not None:
            img_as_tensor = self.transforms(img_as_img)

        # Get label(class) of the image based on the cropped pandas column
        self.single_image_label = self.label_arr[index]
        
        return (img_as_tensor, self.single_image_label)

    def __len__(self):
        return self.data_len

In [None]:
train_Aug = transforms.Compose([transforms.Resize((224, 224)),
                                transforms.RandomApply([
                                    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2), 
                                    transforms.RandomHorizontalFlip(p=0.5),
                                    transforms.RandomVerticalFlip(p=0.5),
                                    transforms.RandomGrayscale(p=0.1),
                                    transforms.RandomAffine(10, translate=(0.2, 0.2), fillcolor=0)], p=0.5), 
                                transforms.ToTensor(), 
                               ])
test_Aug = transforms.Compose([transforms.Resize((224, 224)), transforms.ToTensor()])
train_set=CustomDatasetFromImages('C:/train.csv', transforms=train_Aug)
test_set=CustomDatasetFromImages('C:/test.csv', transforms=test_Aug)
trainloader= DataLoader(train_set, batch_size=32, num_workers=2, shuffle=True) 
testloader= DataLoader(test_set, batch_size=32, num_workers=2, shuffle=False)

In [None]:
print(len(train_set))
print(len(test_set))

In [None]:
print(len(trainloader))
print(len(testloader))

In [None]:
device = torch.device("cuda:0")
# model = models.resnet18(pretrained=True)
model = models.resnet34(pretrained=True)
# model = models.resnet50(pretrained=True)
# model = torchvision.models.resnet50()
# model.avgpool = nn.AdaptiveAvgPool2d(1)
print(model)

In [None]:
for param in model.parameters():
    param.requires_grad = True
    
#for resnet50, remove nn.Linear(512, 128)
model.fc = nn.Sequential(nn.Linear(512, 128),
                                 nn.ReLU(),
                                 nn.Dropout(0.2),
                                 nn.Linear(128, 2),
                                 nn.LogSoftmax(dim=1))
criterion = nn.NLLLoss()
optimizer = Adam(model.parameters(), lr=0.001, weight_decay=0.0005)
scheduler = StepLR(optimizer, step_size=30, gamma=0.1)
model.to(device)

In [None]:
# Resnet 18
epochs = 100
best_acc = 0.00

def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

for epoch in range(epochs):
    train_predictions = []
    train_labels = []
    _loss = []

    model.train(True)
    for data, label in trainloader:
        data, label = data.to(device), label.to(device)
        optimizer.zero_grad() 
        output = model(data)
        loss = criterion(output, label)
        loss.backward()
        optimizer.step() 
        _loss.append(loss.detach().item())
        train_predictions += torch.argmax(output, dim=1).detach().cpu().numpy().tolist()
        train_labels += label.detach().cpu().numpy().tolist()

    print("learning rate - "+str(get_lr(optimizer)))
    scheduler.step()

    test_predictions = []
    test_labels = []
    _test_loss=[]
    model.train(False)
    if (epoch+ 1) % 5 == 0:
        for data, label in testloader:
            data, label = data.to(device), label.to(device) 
            output = model(data)
            test_loss = criterion(output, label)
            _test_loss.append(test_loss.detach().item())
            test_predictions += torch.argmax(output, dim=1).detach().cpu().numpy().tolist()
            test_labels += label.detach().cpu().numpy().tolist()
            test_acc=accuracy_score(test_labels, test_predictions)
        print(f"Epoch {epoch+1}/{epochs}.. "
                      f"Train loss: {np.mean(_loss):.8f} "
                      f"Test loss: {np.mean(_test_loss):.8f} "
                      f"Train accuracy: {accuracy_score(train_labels, train_predictions):.8f} "
                      f"Test accuracy: {accuracy_score(test_labels, test_predictions):.8f} ")
        if (epoch+ 1) % 25 == 0:
            print(classification_report(test_labels, test_predictions))
    else:
        print(f"Epoch {epoch+1}/{epochs}.. "
                      f"Train loss: {np.mean(_loss):.8f} "
                      f"Train accuracy: {accuracy_score(train_labels, train_predictions):.8f} ")

In [None]:
# Resnet 34
epochs = 100
best_acc = 0.00

def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

for epoch in range(epochs):
    train_predictions = []
    train_labels = []
    _loss = []

    model.train(True)
    for data, label in trainloader:
        data, label = data.to(device), label.to(device)
        optimizer.zero_grad() 
        output = model(data)
        loss = criterion(output, label)
        loss.backward()
        optimizer.step() 
        _loss.append(loss.detach().item())
        train_predictions += torch.argmax(output, dim=1).detach().cpu().numpy().tolist()
        train_labels += label.detach().cpu().numpy().tolist()

    print("learning rate - "+str(get_lr(optimizer)))
    scheduler.step()

    test_predictions = []
    test_labels = []
    _test_loss=[]
    model.train(False)
    if (epoch+ 1) % 5 == 0:
        for data, label in testloader:
            data, label = data.to(device), label.to(device) 
            output = model(data)
            test_loss = criterion(output, label)
            _test_loss.append(test_loss.detach().item())
            test_predictions += torch.argmax(output, dim=1).detach().cpu().numpy().tolist()
            test_labels += label.detach().cpu().numpy().tolist()
            test_acc=accuracy_score(test_labels, test_predictions)
        print(f"Epoch {epoch+1}/{epochs}.. "
                      f"Train loss: {np.mean(_loss):.8f} "
                      f"Test loss: {np.mean(_test_loss):.8f} "
                      f"Train accuracy: {accuracy_score(train_labels, train_predictions):.8f} "
                      f"Test accuracy: {accuracy_score(test_labels, test_predictions):.8f} ")
        if (epoch+ 1) % 25 == 0:
            print(classification_report(test_labels, test_predictions))
    else:
        print(f"Epoch {epoch+1}/{epochs}.. "
                      f"Train loss: {np.mean(_loss):.8f} "
                      f"Train accuracy: {accuracy_score(train_labels, train_predictions):.8f} ")

In [None]:
# Resnet 50
epochs = 100
best_acc = 0.00

def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

for epoch in range(epochs):
    train_predictions = []
    train_labels = []
    _loss = []

    model.train(True)
    for data, label in trainloader:
        data, label = data.to(device), label.to(device)
        optimizer.zero_grad() 
        output = model(data)
        loss = criterion(output, label)
        loss.backward()
        optimizer.step() 
        _loss.append(loss.detach().item())
        train_predictions += torch.argmax(output, dim=1).detach().cpu().numpy().tolist()
        train_labels += label.detach().cpu().numpy().tolist()

    print("learning rate - "+str(get_lr(optimizer)))
    scheduler.step()

    test_predictions = []
    test_labels = []
    _test_loss=[]
    model.train(False)
    if (epoch+ 1) % 5 == 0:
        for data, label in testloader:
            data, label = data.to(device), label.to(device) 
            output = model(data)
            test_loss = criterion(output, label)
            _test_loss.append(test_loss.detach().item())
            test_predictions += torch.argmax(output, dim=1).detach().cpu().numpy().tolist()
            test_labels += label.detach().cpu().numpy().tolist()
            test_acc=accuracy_score(test_labels, test_predictions)
        print(f"Epoch {epoch+1}/{epochs}.. "
                      f"Train loss: {np.mean(_loss):.8f} "
                      f"Test loss: {np.mean(_test_loss):.8f} "
                      f"Train accuracy: {accuracy_score(train_labels, train_predictions):.8f} "
                      f"Test accuracy: {accuracy_score(test_labels, test_predictions):.8f} ")
        if (epoch+ 1) % 25 == 0:
            print(classification_report(test_labels, test_predictions))
    else:
        print(f"Epoch {epoch+1}/{epochs}.. "
                      f"Train loss: {np.mean(_loss):.8f} "
                      f"Train accuracy: {accuracy_score(train_labels, train_predictions):.8f} ")

In [None]:
# Resnet 50
# Upsampling the minority with two times more
epochs = 100
best_acc = 0.00

def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

for epoch in range(epochs):
    train_predictions = []
    train_labels = []
    _loss = []

    model.train(True)
    for data, label in trainloader:
        data, label = data.to(device), label.to(device)
        optimizer.zero_grad() 
        output = model(data)
        loss = criterion(output, label)
        loss.backward()
        optimizer.step() 
        _loss.append(loss.detach().item())
        train_predictions += torch.argmax(output, dim=1).detach().cpu().numpy().tolist()
        train_labels += label.detach().cpu().numpy().tolist()

    print("learning rate - "+str(get_lr(optimizer)))
    scheduler.step()

    test_predictions = []
    test_labels = []
    _test_loss=[]
    model.train(False)
    if (epoch+ 1) % 5 == 0:
        for data, label in testloader:
            data, label = data.to(device), label.to(device) 
            output = model(data)
            test_loss = criterion(output, label)
            _test_loss.append(test_loss.detach().item())
            test_predictions += torch.argmax(output, dim=1).detach().cpu().numpy().tolist()
            test_labels += label.detach().cpu().numpy().tolist()
            test_acc=accuracy_score(test_labels, test_predictions)
        print(f"Epoch {epoch+1}/{epochs}.. "
                      f"Train loss: {np.mean(_loss):.8f} "
                      f"Test loss: {np.mean(_test_loss):.8f} "
                      f"Train accuracy: {accuracy_score(train_labels, train_predictions):.8f} "
                      f"Test accuracy: {accuracy_score(test_labels, test_predictions):.8f} ")
        if (epoch+ 1) % 25 == 0:
            print(classification_report(test_labels, test_predictions))
    else:
        print(f"Epoch {epoch+1}/{epochs}.. "
                      f"Train loss: {np.mean(_loss):.8f} "
                      f"Train accuracy: {accuracy_score(train_labels, train_predictions):.8f} ")

In [None]:
print(np.array(test_pred_saved).shape)
print(np.array(test_actual_saved).shape)
print(np.array(test_img_saved).shape)

In [None]:
# # actual bad, predicted bad

image_1=[]
new_act=[]
new_pred=[]
for i in range (len(test_img_saved)):
    for j in range(len(test_img_saved[0])):
        try:
            if test_actual_saved[i*32+j] == test_pred_saved[i*32+j]:
                if test_actual_saved[i*32+j] == 0:
                    image_1.append(test_img_saved[i][j])
                    new_act.append(test_actual_saved[i*32+j])
                    new_pred.append(test_pred_saved[i*32+j])
        except:
            None

bad = 0
for num in test_labels:
    if num == 0:
        bad += 1

predict = len(image_1) * (1 - accuracy_score(test_labels, test_predictions))
if bad != 0:
    divide = len(image_1) / bad * 100
else:
    divide = 0
print('Test images: ' + str(len(test_set)))
print('Bad images: ' + str(bad))
print('Actual bad, predicted bad: ' + str(len(image_1)))
print('Predicted percentage: ' + str("{:.4f}".format((accuracy_score(test_labels, test_predictions) * 100))) + '%')
print('Actual percentage: ' + str(("{:.4f}".format(divide))) + '%')
print('Predicted false calls: ' + str(("{:.4f}".format(predict))))
print('Actual false calls: ' + str(bad - len(image_1)))

diff = divide - (accuracy_score(test_labels, test_predictions) * 100)
if -1 < diff:
    print('Percentage difference: ' + colored(str(("{:+.4f}".format(diff))), 'green') + '%')

elif -3 < diff <= -1:
    print('Percentage difference: ' + colored(str(("{:+.4f}".format(diff))), 'yellow') + '%')
    
elif diff <= -3:
    print('Percentage difference: ' + colored(str(("{:+.4f}".format(diff))), 'red') + '%')

if predict > (bad - len(image_1)):
    diff = predict - (bad - len(image_1))
    print('False call difference: ' + str(("{:.4f}".format(diff))))
    
else:
    diff = (bad - len(image_1)) - predict
    print('False call difference: ' + str(("{:.4f}".format(diff))))

#inv_normalize = transforms.Compose([transforms.Normalize(mean = [ 0., 0., 0. ],
#                                                          std = [ 1/0.5, 1/0.5, 1/0.5]),
#                                    transforms.Normalize(mean = [ -0.5, -0.5, -0.5],
#                                                         std = [ 1., 1., 1. ]),
#                                   ])

from matplotlib import pyplot as plt

fig=plt.figure(figsize=(15,20*len(image_1)/5))
k=0
for i in range(len(image_1)):
    #inv_tensor = inv_normalize(image_1[i])
    #temp=inv_tensor.cpu().detach().numpy()
    temp=image_1[i].cpu().detach().numpy()
    new_image=temp.transpose((1,2,0))
    fig.add_subplot(len(image_1), 5, i+1)
    plt.title("Actual:"+str(new_act[i])+" Predicted:"+ str(new_pred[i]))
    plt.imshow(new_image)
plt.show()



In [None]:
# actual good, predicted good

image_1=[]
new_act=[]
new_pred=[]
for i in range (len(test_img_saved)):
    for j in range(len(test_img_saved[0])):
        try:
            if test_actual_saved[i*32+j] == test_pred_saved[i*32+j]:
                if test_actual_saved[i*32+j] == 1:
                    image_1.append(test_img_saved[i][j])
                    new_act.append(test_actual_saved[i*32+j])
                    new_pred.append(test_pred_saved[i*32+j])
        except:
            None
            
good = 0
for num in test_labels:
    if num == 1:
        good += 1

predict = len(image_1) * (1 - accuracy_score(test_labels, test_predictions))
if good != 0:
    divide = len(image_1) / good * 100
else:
    divide = 0
print('Test images: ' + str(len(test_set)))
print('Good images: ' + str(good))
print('Actual good, predicted good: ' + str(len(image_1)))
print('Predicted percentage: ' + str("{:.4f}".format((accuracy_score(test_labels, test_predictions) * 100))) + '%')
print('Actual percentage: ' + str(("{:.4f}".format(divide))) + '%')
print('Predicted false calls: ' + str(("{:.4f}".format(predict))))
print('Actual false calls: ' + str(good - len(image_1)))

diff = divide - (accuracy_score(test_labels, test_predictions) * 100)
if -1 < diff:
    print('Percentage difference: ' + colored(str(("{:+.4f}".format(diff))), 'green') + '%')

elif -3 < diff <= -1:
    print('Percentage difference: ' + colored(str(("{:+.4f}".format(diff))), 'yellow') + '%')
    
elif diff <= -3:
    print('Percentage difference: ' + colored(str(("{:+.4f}".format(diff))), 'red') + '%')

if predict > (good - len(image_1)):
    diff = predict - (good - len(image_1))
    print('False call difference: ' + str(("{:.4f}".format(diff))))
    
else:
    diff = (good - len(image_1)) - predict
    print('False call difference: ' + str(("{:.4f}".format(diff))))

#inv_normalize = transforms.Compose([transforms.Normalize(mean = [ 0., 0., 0. ],
#                                                          std = [ 1/0.5, 1/0.5, 1/0.5]),
#                                    transforms.Normalize(mean = [ -0.5, -0.5, -0.5],
#                                                         std = [ 1., 1., 1. ]),
#                                   ])

from matplotlib import pyplot as plt

fig=plt.figure(figsize=(15,20*len(image_1)/5))
k=0
for i in range(len(image_1)):
    #inv_tensor = inv_normalize(image_1[i])
    #temp=inv_tensor.cpu().detach().numpy()
    temp=image_1[i].cpu().detach().numpy()
    new_image=temp.transpose((1,2,0))
    fig.add_subplot(len(image_1), 5, i+1)
    plt.title("Actual:"+str(new_act[i])+" Predicted:"+ str(new_pred[i]))
    plt.imshow(new_image)
plt.show()

In [None]:
# actual bad, predicted good

image_1=[]
new_act=[]
new_pred=[]
for i in range (len(test_img_saved)):
    for j in range(len(test_img_saved[0])):
        try:
            if test_actual_saved[i*32+j] != test_pred_saved[i*32+j]:
                if test_actual_saved[i*32+j] == 0:
                    image_1.append(test_img_saved[i][j])
                    new_act.append(test_actual_saved[i*32+j])
                    new_pred.append(test_pred_saved[i*32+j])
        except:
            None
            
predict = (len(test_set)) * (1 - (accuracy_score(test_labels, test_predictions)))
divide = (len(test_set) - len(image_1)) / len(test_set) * 100
print('Test images: ' + str(len(test_set)))
print('Actual bad, predicted good: ' + str(len(image_1)))
print('Predicted percentage: ' + str("{:.4f}".format((accuracy_score(test_labels, test_predictions) * 100))) + '%')
print('Actual percentage: ' + str(("{:.4f}".format(divide))) + '%')
print('Predicted false calls: ' + str(("{:.4f}".format(predict))))
print('Actual false calls: ' + str("{:.4f}".format((len(test_set) * (100 - divide) / 100))))

diff = divide - (accuracy_score(test_labels, test_predictions) * 100)
if -1 < diff:
    print('Percentage difference: ' + colored(str(("{:+.4f}".format(diff))), 'green') + '%')

elif -3 < diff <= -1:
    print('Percentage difference: ' + colored(str(("{:+.4f}".format(diff))), 'yellow') + '%')
    
elif diff <= -3:
    print('Percentage difference: ' + colored(str(("{:+.4f}".format(diff))), 'red') + '%')

if predict > len(image_1):
    diff = predict - len(image_1)
    print('False call difference: ' + str(("{:.4f}".format(diff))))
    
else:
    diff = len(image_1) - predict
    print('False call difference: ' + str(("{:.4f}".format(diff))))

#inv_normalize = transforms.Compose([transforms.Normalize(mean = [ 0., 0., 0. ],
#                                                          std = [ 1/0.5, 1/0.5, 1/0.5]),
#                                    transforms.Normalize(mean = [ -0.5, -0.5, -0.5],
#                                                         std = [ 1., 1., 1. ]),
#                                   ])

from matplotlib import pyplot as plt

fig=plt.figure(figsize=(15,20*len(image_1)/5))
k=0
for i in range(len(image_1)):
    #inv_tensor = inv_normalize(image_1[i])
    #temp=inv_tensor.cpu().detach().numpy()
    temp=image_1[i].cpu().detach().numpy()
    new_image=temp.transpose((1,2,0))
    fig.add_subplot(len(image_1), 5, i+1)
    plt.title("Actual:"+str(new_act[i])+" Predicted:"+ str(new_pred[i]))
    plt.imshow(new_image)
plt.show()

In [None]:
# actual good, predicted bad

image_1=[]
new_act=[]
new_pred=[]
for i in range (len(test_img_saved)):
    for j in range(len(test_img_saved[0])):
        try:
            if test_actual_saved[i*32+j] != test_pred_saved[i*32+j]:
                if test_actual_saved[i*32+j] == 1:
                    image_1.append(test_img_saved[i][j])
                    new_act.append(test_actual_saved[i*32+j])
                    new_pred.append(test_pred_saved[i*32+j])
        except:
            None

predict = (len(test_set)) * (1 - (accuracy_score(test_labels, test_predictions)))
divide = (len(test_set) - len(image_1)) / len(test_set) * 100
print('Test images: ' + str(len(test_set)))
print('Actual good, predicted bad: ' + str(len(image_1)))
print('Predicted percentage: ' + str("{:.4f}".format((accuracy_score(test_labels, test_predictions) * 100))) + '%')
print('Actual percentage: ' + str(("{:.4f}".format(divide))) + '%')
print('Predicted false calls: ' + str(("{:.4f}".format(predict))))
print('Actual false calls: ' + str("{:.4f}".format((len(test_set) * (100 - divide) / 100))))

diff = divide - (accuracy_score(test_labels, test_predictions) * 100)
if -1 < diff:
    print('Percentage difference: ' + colored(str(("{:+.4f}".format(diff))), 'green') + '%')

elif -3 < diff <= -1:
    print('Percentage difference: ' + colored(str(("{:+.4f}".format(diff))), 'yellow') + '%')
    
elif diff <= -3:
    print('Percentage difference: ' + colored(str(("{:+.4f}".format(diff))), 'red') + '%')
    
if predict > len(image_1):
    diff = predict - len(image_1)
    print('False call difference: ' + str(("{:.4f}".format(diff))))
    
else:
    diff = len(image_1) - predict
    print('False call difference: ' + str(("{:.4f}".format(diff))))


#inv_normalize = transforms.Compose([transforms.Normalize(mean = [ 0., 0., 0. ],
#                                                          std = [ 1/0.5, 1/0.5, 1/0.5]),
#                                    transforms.Normalize(mean = [ -0.5, -0.5, -0.5],
#                                                         std = [ 1., 1., 1. ]),
#                                   ])

from matplotlib import pyplot as plt

fig=plt.figure(figsize=(15,20*len(image_1)/5))
k=0
for i in range(len(image_1)):
    #inv_tensor = inv_normalize(image_1[i])
    #temp=inv_tensor.cpu().detach().numpy()
    temp=image_1[i].cpu().detach().numpy()
    new_image=temp.transpose((1,2,0))
    fig.add_subplot(len(image_1), 5, i+1)
    plt.title("Actual:"+str(new_act[i])+" Predicted:"+ str(new_pred[i]))
    if i <= 25:
        plt.imshow(new_image)
plt.show()

In [None]:
print(fig)