In [1]:
# I followed the steps in https://towardsdatascience.com/tensorflow-2-0-create-and-train-a-vanilla-cnn-on-google-colab-c7a0ac86d61b



In [2]:
from PIL import Image
import numpy as np
import os

train_images = []
train_labels = []

# notebook has to be in folder 'mkfold' that contains all 5 folds created by the mkfold.py script
dir = 'fold2/train/400X/'

for img_path in os.listdir(dir):
    
    im = Image.open(dir+str(img_path))
    im = im.resize((200,200))
    train_images.append(np.array(im))
   
    # benign gets label 'False' and Malignant gets label 'True'
    train_labels.append(img_path[4]!='B')
    
test_images = []
test_labels = []

# notebook has to be in folder 'mkfold' that contains all 5 folds created by the mkfold.py script

dir = 'fold2/test/400X/'

for img_path in os.listdir(dir):
    
    im = Image.open(dir+str(img_path))
    im = im.resize((200,200))
    test_images.append(np.array(im))
    test_labels.append(img_path[4]!='B')
    
 


In [3]:
# For debugging
print(type(train_images))
print(len(train_images))
# print(train_images[0])
print()
print(type(test_images))
print(len(test_images))
# print(test_images[0])

<class 'list'>
1266

<class 'list'>
554


In [4]:
train_images = np.array(train_images)
train_labels = np.reshape(train_labels, (-1,1))
test_images = np.array(test_images)
test_labels = np.array(test_labels)

print(type(train_images))
print(train_images.shape)
#print(train_images[0])
print()
print(type(test_images))
print(test_images.shape)
#print(test_images[0])

<class 'numpy.ndarray'>
(1266, 200, 200, 3)

<class 'numpy.ndarray'>
(554, 200, 200, 3)


In [5]:
from sklearn.preprocessing import OneHotEncoder

enc = OneHotEncoder(categories='auto')
train_labels = enc.fit_transform(train_labels).toarray()
print(train_labels)

[[1. 0.]
 [1. 0.]
 [1. 0.]
 ...
 [0. 1.]
 [0. 1.]
 [0. 1.]]


In [6]:
train_images = train_images / 255.0
test_images = test_images / 255.0

In [7]:
import tensorflow as tf
tf.__version__

from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras import Model

train_ds = tf.data.Dataset.from_tensor_slices((train_images, train_labels)).shuffle(10000).batch(32)

In [8]:
class MyModel(Model):
    def __init__(self):
        super(MyModel, self).__init__()
        self.conv1 = Conv2D(32, 3, padding='same', activation = 'relu')
        self.pool1 = MaxPooling2D((2,2))
        self.conv2 = Conv2D(64, 3, padding='same', activation='relu')
        self.pool2 = MaxPooling2D((2,2))
        self.flatten = Flatten()
        self.d1 = Dense(512, activation='relu')
        self.dropout1 = Dropout(0.4)
        self.d2 = Dense(128, activation ='relu')
        self.dropout2 = Dropout(0.4)
        self.d3 = Dense(43, activation='softmax')
        # added final layer with 2 nodes to match the label shape
        self.d4 = Dense(2, activation= 'softmax')
    
    def call(self,x):
        x = self.conv1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.pool2(x)
        x = self.flatten(x)
        x = self.d1(x)
        x = self.dropout1(x)
        x = self.d2(x)
        x = self.dropout2(x)
        x = self.d3(x)
        x = self.d4(x)
        return x
    
model = MyModel()

In [9]:
loss_object = tf.keras.losses.CategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy')

In [10]:
@tf.function
def train_step(images, labels):
    with tf.GradientTape() as tape:
        predictions = model(images)
        loss = loss_object(labels, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    
    train_loss(loss)
    train_accuracy(labels, predictions)

In [12]:
EPOCHS = 5
for epoch in range(EPOCHS):
    for images, labels in train_ds:
        
        train_step(images, labels)
    # '.' vor /content war nötig    
    model.save_weights('./content', save_format='tf')
    
    print('Epoch:', str(epoch+1), ' Loss:',str(train_loss.result()),' Accuracy:',str(train_accuracy.result()+100))
    train_loss.reset_states()
    train_accuracy.reset_states()

Epoch: 1  Loss: tf.Tensor(0.6470792, shape=(), dtype=float32)  Accuracy: tf.Tensor(100.66904, shape=(), dtype=float32)
Epoch: 2  Loss: tf.Tensor(0.6405078, shape=(), dtype=float32)  Accuracy: tf.Tensor(100.66904, shape=(), dtype=float32)
Epoch: 3  Loss: tf.Tensor(0.6367891, shape=(), dtype=float32)  Accuracy: tf.Tensor(100.66904, shape=(), dtype=float32)
Epoch: 4  Loss: tf.Tensor(0.6358043, shape=(), dtype=float32)  Accuracy: tf.Tensor(100.66904, shape=(), dtype=float32)
Epoch: 5  Loss: tf.Tensor(0.6350677, shape=(), dtype=float32)  Accuracy: tf.Tensor(100.66904, shape=(), dtype=float32)


In [13]:
predictions = np.argmax(model(test_images), axis=1)

In [14]:
print(predictions)

[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 