<a href="https://colab.research.google.com/github/giobiba/CT-scan-classifier/blob/main/ct_scan_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
import functools
import numpy as np
import os
import matplotlib.image as mpimg
import sys
import matplotlib.pyplot as plt
from IPython.display import clear_output
import zipfile
from sklearn.metrics import classification_report, confusion_matrix

with zipfile.ZipFile("./drive/MyDrive/data.zip", "r") as zip_ref:
  zip_ref.extractall("./data")


# folosim functia functools.partials pentru a simplifica scrierea layerelor mai tarziu in cod
Conv2D = functools.partial(tf.keras.layers.Conv2D, activation='relu')
MaxPool2D = tf.keras.layers.MaxPool2D
Flatten = tf.keras.layers.Flatten
Dense = functools.partial(tf.keras.layers.Dense, activation='relu')

np.set_printoptions(precision=4, threshold=sys.maxsize)

BATCH_SIZE = 64
EPOCHS = 5
VAL_EPOCHS = 3


path = "./data/"
test = "test"
train = "train"
validation = "validation"
width, height = 50, 50
n_filters = 24
learning_rate = 0.1

class PlotLearning(tf.keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.metrics = {}
        for metric in logs:
            self.metrics[metric] = []
            

    def on_epoch_end(self, epoch, logs={}):
        for metric in logs:
            if metric in self.metrics:
                self.metrics[metric].append(logs.get(metric))
            else:
                self.metrics[metric] = [logs.get(metric)]

        metrics = [x for x in logs if 'val' not in x]
        
        f, axs = plt.subplots(1, len(metrics), figsize=(15,5))
        clear_output(wait=True)

        for i, metric in enumerate(metrics):
            axs[i].plot(range(1, epoch + 2), 
                        self.metrics[metric], 
                        label=metric)
            if logs['val_' + metric]:
                axs[i].plot(range(1, epoch + 2), 
                            self.metrics['val_' + metric], 
                            label='val_' + metric)
                
            axs[i].legend()
            axs[i].grid()

        plt.tight_layout()
        plt.show()


def get_images_and_labels(file = "train"):
  images = []
  labels = []
  with open(path + file + ".txt") as f:
    for line in f.readlines():
      img_path, label = line.split(",")
      img = mpimg.imread(path + file + "/" + img_path)
      img = np.expand_dims(img, axis = -1)
      label = int(label)
      images.append(img) 
      labels.append(label)
      
  return np.array(images), np.array(labels)

def get_images(file = "test"):
  images = []
  paths = []
  with open(path + file + ".txt") as f:
    for line in f.readlines():
      img_path = line.strip()
      img = mpimg.imread(path + file + "/" + img_path)
      img = np.expand_dims(img, axis = -1)
      images.append(img) 
      paths.append(img_path)
  return np.array(images), paths

train_images, train_labels = get_images_and_labels(train)
val_images, val_labels = get_images_and_labels(validation)

test_images, test_paths = get_images(test)

Unziping the files that we will train out CNN on.

In [None]:
def build_model(width, height):
  global n_filters
  model = tf.keras.Sequential(
      layers=[
        Conv2D(filters=1*n_filters, kernel_size=(3,3), strides=2, input_shape = (height, width, 1)),
        MaxPool2D((2,2)),

        Conv2D(filters=2*n_filters, kernel_size=(3,3)),
        MaxPool2D((2,2)),

        Conv2D(filters=4*n_filters, kernel_size=(3,3)),

        Flatten(),
        Dense(512),
        Dense(3, activation="softmax")
      ])

  return model


model = build_model(height, width)
model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=0.02), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 24, 24, 24)        240       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 12, 12, 24)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 10, 10, 48)        10416     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 5, 5, 48)          0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 3, 3, 96)          41568     
_________________________________________________________________
flatten (Flatten)            (None, 864)               0         
_________________________________________________________________
dense (Dense)                (None, 512)               4

Training the model

In [None]:
# training the model with the train set
history = model.fit(x = train_images, 
                    y = train_labels, 
                    batch_size=BATCH_SIZE, 
                    epochs=EPOCHS, 
                    shuffle=True, 
                    use_multiprocessing=True,
                    validation_data = (val_images, val_labels),
                    callbacks=[PlotLearning()])



In [None]:
# antrenam modelul si pe datele
history = model.fit(x = val_images, 
                    y = val_labels, 
                    batch_size=BATCH_SIZE, 
                    epochs=VAL_EPOCHS, 
                    shuffle=True, 
                    use_multiprocessing=True,
                    callbacks=[PlotLearning])

Evaluating our trained model

In [None]:
val_loss, val_acc = model.evaluate(val_images, val_labels)
print("Model's accuracy: {}%".format(round(val_acc * 100, 4)))

Model's accuracy: 68.1556%


In [None]:
y_pred = model.predict(val_images)
y_pred = np.argmax(y_pred, axis=1)
print("Confusion Matrix")
print(confusion_matrix(val_labels, y_pred))
print("Classification report")
print(classification_report(val_labels, y_pred))

Confusion Matrix
[[1289  174   37]
 [ 406  649  445]
 [ 285  225  990]]
Classification report
              precision    recall  f1-score   support

           0       0.65      0.86      0.74      1500
           1       0.62      0.43      0.51      1500
           2       0.67      0.66      0.67      1500

    accuracy                           0.65      4500
   macro avg       0.65      0.65      0.64      4500
weighted avg       0.65      0.65      0.64      4500



  Predicting the classes for the images


In [None]:
predicted_labels = model.predict(test_images)
predicted = list(map(lambda x: np.argmax(x), predicted_labels))

Write the result from our predictions to a csv file.

In [None]:
with open("./drive/MyDrive/results.csv", "w") as f:
  f.write("id,label\n")
  for i in range(len(predicted_labels)):
    f.write("{},{}\n".format(test_paths[i], predicted[i]))