<a href="https://colab.research.google.com/github/forrestpark/MachineLearning/blob/main/Lung_CT_Scan_Medical_Image_Processing_Model_Building_(CNN).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Name: Jang Woo Park (박장우)

### Importing the required libraries

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
from keras.optimizers import Adam
from keras.layers import Dense, Flatten,Input, Convolution2D, Dropout, Activation,Concatenate
from keras.models import Sequential, Model
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.optimizers import *
from keras.layers.normalization import BatchNormalization
from keras.metrics import categorical_accuracy
from keras.models import model_from_json
import pickle

### -> Loading .npy files

In [None]:
x_tr=np.load('/content/lung-ct-scan/trn_dat.npy')
y_tr=np.load('/content/lung-ct-scan/trn_lbl.npy')

In [None]:
print('shape of train data :',x_tr.shape)
print('shape of train labels :',y_tr.shape)

shape of train data : (2800, 32, 32, 16)
shape of train labels : (2800,)


In [None]:
eva=np.load("/content/sample_evaluation_data.npy")
eva.shape

(10, 32, 32, 16)

### Randomly spiltting the dataset into train (90%) and test (10%)

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x_tr, y_tr, test_size=0.1, random_state=42)

### Building the model:

In [None]:
def my_model():
    model = Sequential()
    input_shape = (32,32,16)
    model.add(Conv2D(64, (5, 5), input_shape=input_shape,activation='relu', padding='same'))
    model.add(Conv2D(64, (5, 5), input_shape=input_shape,activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(128, (3, 3),activation='relu',padding='same'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(256, (3, 3),activation='relu',padding='same'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Flatten())
    model.add(Dense(128))
    model.add(Activation('relu'))
    model.add(Dropout(0.2))
    model.add(Dense(1))
    model.add(Activation('sigmoid'))
    model.compile(loss='binary_crossentropy', metrics=['accuracy'],optimizer='adam')
    return model
model=my_model()
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 32, 32, 64)        25664     
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 32, 32, 64)        102464    
_________________________________________________________________
batch_normalization (BatchNo (None, 32, 32, 64)        256       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 16, 16, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 16, 16, 128)       73856     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 8, 8, 128)         0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 8, 8, 256)         2

In [None]:
model.fit(x_train,y_train,verbose=1,epochs=10,validation_data=(x_test,y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10

### Here, the training accuracy of the model is 93.45% approx. with the training loss of 0.1572
### The validation accuracy of the model is 88.21% approx. with the validation loss of 0.33

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
y_pred=model.predict(x_test)
y_pred=np.array(list(map(lambda x: 1 if x>0.5 else 0,list(y_pred))))
print('Accuracy score :',accuracy_score(y_test,y_pred))
confusion_matrix(y_test,y_pred)

### -> saving model weights

In [None]:
model.save_weights('model_weights.h5')
model_json = model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)

In [None]:
y_pred=model.predict(eva)
np.array(list(map(lambda x: 1 if x>0.3 else 0,list(y_pred))))

### -> Confusion matrix

In [None]:
y_pred=model.predict(x_test)
y_pred=np.array(list(map(lambda x: 1 if x>0.5 else 0,list(y_pred))))
cf=confusion_matrix(y_test,y_pred)
sns.heatmap(cf, annot=True, fmt='', cmap='Blues')

In [None]:
print(classification_report(y_test, y_pred))

### Here, the precision, recall and f1-score are pretty good for both the labels

In [None]:
labels = '0','1'
l=list(y_pred)
sizes = [l.count(0),l.count(1)]
explode = (0, 0.1)
fig, ax = plt.subplots(figsize=(10,8))
ax.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%', shadow=True, startangle=90, 
       textprops={'fontsize':14})
ax.axis('equal')
plt.legend(loc='upper right')
plt.title("Predictions",size=20)
plt.show()

## Observations:

### --> 50.7% of the points are classified as False (0) i.e., 129   out of 280 are classified as label 0 out of which 149 are actually 0.

### --> 49.3% of the points are classified as True (1) i.e., 118   out of 280 are classified as label 1** out of which 131 are actually 1.