# **Projektarbeit: Malaria Cell Images Dataset** - Vorhersage mit Convolutional Neuronal Networks

## Als Alternative zu Support Vector Machines wurden unterschiedliche Convolutional Neuronal Networks trainiert:
1) 2  convolutional layer, 3 fully connected layer, SGD Optimizer und 5 Epochen: 83% <br>
2) 2 convolutional layer, 3 fully connected layer, Adam Optimizer und 20 Epochen: 90% <br>
3) 3 convolutional layer, 3 fully connected layer, Adam Optimizer und 10 Epochen: 92% <br>


In [15]:
#Bibliotheken importieren
import numpy as np
import matplotlib.pyplot as plt
import PIL
from PIL import Image, ImageOps, ImageEnhance
import glob


import torch
from torch.utils.data import TensorDataset, DataLoader
from torchvision import models
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from sklearn.model_selection import train_test_split

In [2]:
#Hilfsfunktion zum Padding
def pad_img(img, size):
    width, height = img.size
    aspect_old = img.size[0] / img.size[1]
    aspect_new = size[0] / size[1]
    aspect_factor = aspect_new / aspect_old
    if (aspect_factor > 0.95) & (aspect_factor < 1.05):
        return img
    elif aspect_factor < 1:
        result = Image.new(img.mode, (width, int(width // aspect_factor)), 0)
        result.paste(img, (0, ((result.size[1] - img.size[1]) // 2)))
        return result
    else:
        result = Image.new(img.mode, (int(height * aspect_factor), height), 0)
        result.paste(img, ((((result.size[0] - img.size[0]) // 2), 0)))
        return result

#Hilfsfunktion zum Laden und Transformieren der Bilder
def transform_image(image, size=None, grayscale=True, padding=False, contrast=1.0):
    if (padding == True) & (size != None):
        image = pad_img(image, size)
    if size != None:
         image = image.resize(size)
    if grayscale == True:
         image = ImageOps.grayscale(image)
        
    enhancer = ImageEnhance.Contrast(image)
    image = enhancer.enhance(contrast)

    return np.asarray(image)

In [3]:
#Skalierung
size = (50, 50)
#Padding ja / nein?
padding = False
#Graustufen ja / nein?
grey=False
#Kontrast
contrast = 1



# Bilder mit Label 'parasitized' importieren
labels = []
imgs = []
filelist = glob.glob('Parasitized/*.png')
for idx, i in enumerate(filelist):
    tmp_image = Image.open(i)
    imgs.append(transform_image(tmp_image, size=size, grayscale=grey, padding=padding, contrast=contrast))
    tmp_image.close()
    labels.append(1)
    if(idx % 2000 == 0):
        print(idx)
          
# Bilder mit Label 'uninfected' importieren
filelist = glob.glob('Uninfected/*.png')
for idx, i in enumerate(filelist):
    tmp_image = Image.open(i)
    imgs.append(transform_image(tmp_image, size=size, grayscale=grey, padding=padding, contrast=contrast))
    tmp_image.close()
    labels.append(0)
    if(idx % 2000 == 0):
        print(idx)
        
y = np.asarray(labels)
X = np.asarray(imgs)

0
2000
4000
6000
8000
10000
12000
0
2000
4000
6000
8000
10000
12000


In [None]:
#Array für CNN transformieren
width, height = size
X = X.reshape(len(X),1, width, height)

In [29]:
#Daten aufteilen
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, stratify=y, random_state=1234)

In [30]:
#Numpy in Tensor umwandeln
train_X_tensor = torch.from_numpy(X_train).float()
train_y_tensor = torch.from_numpy(y_train).long()

test_X_tensor = torch.from_numpy(X_test).float()
test_y_tensor = torch.from_numpy(y_test).long()

train_X_tensor.shape

torch.Size([22046, 1, 50, 50])

In [31]:
#Data und Testloader instanziieren
trainset = TensorDataset(train_X_tensor, train_y_tensor)
trainloader = DataLoader(trainset, batch_size=32, shuffle=True)

testset = TensorDataset(test_X_tensor, test_y_tensor)
testloader = DataLoader(testset, batch_size=len(testset))

In [32]:
#Check, ob Input richtiges Format hat
for i, batch in enumerate(trainloader, 0):
    # get the inputs; data is a list of [inputs, labels]
    inputs, labels = batch
    print(inputs.shape)
    break

torch.Size([32, 1, 50, 50])


### CNN mit 2. Convolutional Layers aufbauen

In [34]:
# Zwei Covolutional Layer
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        #Convolutional Layer
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, padding=2)
        
        #Pooling Layer --> skalieren quasi das Bild
        self.pool1 = nn.MaxPool2d(2, 2)
        
        #Convolutional Layer
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        self.pool2 = nn.MaxPool2d(2, 2)
        
        #Fully connected Layers (immer am Ende)
        self.fc1 = nn.Linear(16 * 10 * 10, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 2)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool1(x)
        x = F.relu(self.conv2(x))
        x = self.pool2(x)
        # print(x.size)
        x = x.view(x.size(0), -1)  # ähnlich wie reshape
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


conv_net = ConvNet()

In [39]:
# Netzwerk instanziieren
net = ConvNet()

# Verlustfunktion
criterion = nn.CrossEntropyLoss()

# Optimierer
#optimizer = optim.SGD(net.parameters(), lr=0.001)

optimizer = optim.Adam(net.parameters(), lr=0.001)

### Training mit 2 Layern, SGD Optimizer und 5 Epochen

In [37]:
#Training
for epoch in range(10):  # loop over the dataset multiple times

    running_loss = 0.0

    for i, batch in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = batch

        # zero the parameter gradients
        optimizer.zero_grad()

        ### forward + backward + optimize

        # forward - Vorhersage
        outputs = net(inputs)

        loss = criterion(outputs, labels)

        # backward
        loss.backward()

        # optimize
        optimizer.step()


        # print statistics
        running_loss += loss.item()
        if i % 100 == 99:    # print every 100 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0

print('Finished Training')

[1,   100] loss: 0.703
[1,   200] loss: 0.668
[1,   300] loss: 0.648
[1,   400] loss: 0.639
[1,   500] loss: 0.627
[1,   600] loss: 0.609
[2,   100] loss: 0.593
[2,   200] loss: 0.574
[2,   300] loss: 0.565
[2,   400] loss: 0.561
[2,   500] loss: 0.522
[2,   600] loss: 0.524
[3,   100] loss: 0.509
[3,   200] loss: 0.502
[3,   300] loss: 0.494
[3,   400] loss: 0.483
[3,   500] loss: 0.486
[3,   600] loss: 0.473
[4,   100] loss: 0.448
[4,   200] loss: 0.451
[4,   300] loss: 0.454
[4,   400] loss: 0.440
[4,   500] loss: 0.448
[4,   600] loss: 0.444
[5,   100] loss: 0.426
[5,   200] loss: 0.414
[5,   300] loss: 0.435
[5,   400] loss: 0.400
[5,   500] loss: 0.430
[5,   600] loss: 0.413
[6,   100] loss: 0.404
[6,   200] loss: 0.397
[6,   300] loss: 0.393
[6,   400] loss: 0.383
[6,   500] loss: 0.391
[6,   600] loss: 0.410
[7,   100] loss: 0.379
[7,   200] loss: 0.376
[7,   300] loss: 0.377
[7,   400] loss: 0.387
[7,   500] loss: 0.363
[7,   600] loss: 0.375
[8,   100] loss: 0.344
[8,   200] 

In [38]:
#Accuracy
correct = 0
total = 0

with torch.no_grad():

    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

Accuracy of the network on the 10000 test images: 83 %


### Training mit 2 Layern, Adam Optimizer und 20 Epochen

In [40]:
#Training
for epoch in range(20):  # loop over the dataset multiple times

    running_loss = 0.0

    for i, batch in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = batch

        # zero the parameter gradients
        optimizer.zero_grad()

        ### forward + backward + optimize

        # forward - Vorhersage
        outputs = net(inputs)

        loss = criterion(outputs, labels)

        # backward
        loss.backward()

        # optimize
        optimizer.step()


        # print statistics
        running_loss += loss.item()
        if i % 100 == 99:    # print every 100 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0

print('Finished Training')

[1,   100] loss: 1.073
[1,   200] loss: 0.688
[1,   300] loss: 0.682
[1,   400] loss: 0.679
[1,   500] loss: 0.685
[1,   600] loss: 0.677
[2,   100] loss: 0.672
[2,   200] loss: 0.675
[2,   300] loss: 0.668
[2,   400] loss: 0.673
[2,   500] loss: 0.670
[2,   600] loss: 0.662
[3,   100] loss: 0.674
[3,   200] loss: 0.670
[3,   300] loss: 0.655
[3,   400] loss: 0.650
[3,   500] loss: 0.649
[3,   600] loss: 0.656
[4,   100] loss: 0.645
[4,   200] loss: 0.642
[4,   300] loss: 0.634
[4,   400] loss: 0.553
[4,   500] loss: 0.496
[4,   600] loss: 0.474
[5,   100] loss: 0.422
[5,   200] loss: 0.411
[5,   300] loss: 0.384
[5,   400] loss: 0.383
[5,   500] loss: 0.380
[5,   600] loss: 0.349
[6,   100] loss: 0.293
[6,   200] loss: 0.309
[6,   300] loss: 0.327
[6,   400] loss: 0.301
[6,   500] loss: 0.285
[6,   600] loss: 0.303
[7,   100] loss: 0.250
[7,   200] loss: 0.231
[7,   300] loss: 0.247
[7,   400] loss: 0.264
[7,   500] loss: 0.238
[7,   600] loss: 0.243
[8,   100] loss: 0.192
[8,   200] 

In [41]:
#Accuracy
correct = 0
total = 0

with torch.no_grad():

    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

Accuracy of the network on the 10000 test images: 90 %


### Training mit 3 Layern, Adam Optimizer und 10 Epochen

In [54]:
# Drei Convolutional Layer
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        #1. Convolutional Layer
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, padding=2)
        
        #Pooling Layer --> skalieren quasi das Bild
        self.pool1 = nn.MaxPool2d(2, 2)
        
        #2. Convolutional Layer
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, padding=2)
        self.pool2 = nn.MaxPool2d(2, 2)
              
        #3. Convolutional Layer
        self.conv3 = nn.Conv2d(in_channels=16, out_channels=24, kernel_size=5, padding=2)
        self.pool3 = nn.MaxPool2d(2, 2)
        
        #Fully connected Layers (immer am Ende)
        self.fc1 = nn.Linear(24 * 6 * 6, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 2)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool1(x)
        x = F.relu(self.conv2(x))
        x = self.pool2(x)
        x = F.relu(self.conv3(x))
        x = self.pool3(x)
        # print(x.size)
        x = x.view(x.size(0), -1)  # ähnlich wie reshape
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


conv_net = ConvNet()

In [55]:
# Netzwerk instanziieren
net = ConvNet()

# Verlustfunktion
criterion = nn.CrossEntropyLoss()

# Optimierer
#optimizer = optim.SGD(net.parameters(), lr=0.001)

optimizer = optim.Adam(net.parameters(), lr=0.001)

In [62]:
#Training
for epoch in range(10):  # loop over the dataset multiple times

    running_loss = 0.0

    for i, batch in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = batch

        # zero the parameter gradients
        optimizer.zero_grad()

        ### forward + backward + optimize

        # forward - Vorhersage
        outputs = net(inputs)

        loss = criterion(outputs, labels)

        # backward
        loss.backward()

        # optimize
        optimizer.step()


        # print statistics
        running_loss += loss.item()
        if i % 100 == 99:    # print every 100 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0

print('Finished Training')

[1,   100] loss: 0.149
[1,   200] loss: 0.181
[1,   300] loss: 0.164
[1,   400] loss: 0.155
[1,   500] loss: 0.161
[1,   600] loss: 0.165
[2,   100] loss: 0.137
[2,   200] loss: 0.146
[2,   300] loss: 0.148
[2,   400] loss: 0.158
[2,   500] loss: 0.155
[2,   600] loss: 0.147
[3,   100] loss: 0.129
[3,   200] loss: 0.118
[3,   300] loss: 0.139
[3,   400] loss: 0.122
[3,   500] loss: 0.137
[3,   600] loss: 0.133
[4,   100] loss: 0.106
[4,   200] loss: 0.107
[4,   300] loss: 0.124
[4,   400] loss: 0.120
[4,   500] loss: 0.139
[4,   600] loss: 0.129
[5,   100] loss: 0.100
[5,   200] loss: 0.109
[5,   300] loss: 0.122
[5,   400] loss: 0.092
[5,   500] loss: 0.106
[5,   600] loss: 0.128
[6,   100] loss: 0.077
[6,   200] loss: 0.095
[6,   300] loss: 0.092
[6,   400] loss: 0.093
[6,   500] loss: 0.110
[6,   600] loss: 0.099
[7,   100] loss: 0.085
[7,   200] loss: 0.083
[7,   300] loss: 0.070
[7,   400] loss: 0.094
[7,   500] loss: 0.073
[7,   600] loss: 0.086
[8,   100] loss: 0.054
[8,   200] 

In [63]:
#Accuracy
correct = 0
total = 0

with torch.no_grad():

    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

Accuracy of the network on the 10000 test images: 92 %
