In [7]:
import numpy as np
import os
from PIL import Image
import cv2

from sklearn.utils import shuffle #to shuffle the images

In [6]:
# prompt: mount google drive

from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


### Loading Data x Data Processing

In [8]:
chest_data_directory = '/content/drive/MyDrive/Datasets/ChestCancerData/'
train_data_directory = '/content/drive/MyDrive/Datasets/ChestCancerData/train/'
test_data_directory = '/content/drive/MyDrive/Datasets/ChestCancerData/test/'
valid_data_directory = '/content/drive/MyDrive/Datasets/ChestCancerData/valid/'

In [9]:
# train data categories
normal_images = '/content/drive/MyDrive/Datasets/ChestCancerData/train/Normal/'   #LABEL : N
adeno_cell_images = '/content/drive/MyDrive/Datasets/ChestCancerData/train/AdenoCell/'  #LABEL : C1
large_cell_images = '/content/drive/MyDrive/Datasets/ChestCancerData/train/LargeCell/'  #LABEL : C2
squamous_cell_images = '/content/drive/MyDrive/Datasets/ChestCancerData/train/SquamousCell/'  #LABEL : C3


# test_data categories
normal_images_test = '/content/drive/MyDrive/Datasets/ChestCancerData/test/Normal/'
adeno_cell_images_test = '/content/drive/MyDrive/Datasets/ChestCancerData/test/AdenoCell/'
large_cell_images_test = '/content/drive/MyDrive/Datasets/ChestCancerData/test/LargeCell/'
squamous_cell_images_test = '/content/drive/MyDrive/Datasets/ChestCancerData/test/SquamousCell/'



# valid_data categories
normal_images_valid = '/content/drive/MyDrive/Datasets/ChestCancerData/valid/Normal/'
adeno_cell_images_valid = '/content/drive/MyDrive/Datasets/ChestCancerData/valid/AdenoCell/'
large_cell_images_valid = '/content/drive/MyDrive/Datasets/ChestCancerData/valid/LargeCell/'
squamous_cell_images_valid = '/content/drive/MyDrive/Datasets/ChestCancerData/valid/SquamousCell/'

In [10]:
# RENAME THE IMAGES
def rename_images(folder_path, label):
  count = 1

  for filename in os.listdir(folder_path):
    source = folder_path + filename
    destination = folder_path + str(label) + '_' + str(count) + ".png"
    os.rename(source, destination)
    count += 1
  print(label + '_images : ' + str(len(folder_path)))

# rename train images
rename_images(normal_images, 'N')
rename_images(adeno_cell_images, 'C1')
rename_images(large_cell_images, 'C2')
rename_images(squamous_cell_images, 'C3')

# rename test images
rename_images(normal_images_test, 'N')
rename_images(adeno_cell_images_test, 'C1')
rename_images(large_cell_images_test, 'C2')
rename_images(squamous_cell_images_test, 'C3')


N_images : 61
C1_images : 64
C2_images : 64
C3_images : 67
N_images : 60
C1_images : 63
C2_images : 63
C3_images : 66


In [11]:
# combine train and test images
X_train = []
Y_train = []

image_size = 150
labels = ['AdenoCell', 'LargeCell', 'Normal','SquamousCell']

for i in labels:
  folder_path = os.path.join(train_data_directory, i)
  for j in os.listdir(folder_path):
    img = cv2.imread(os.path.join(folder_path, j))
    img = cv2.resize(img, (image_size, image_size))
    X_train.append(img)
    Y_train.append(i)


for i in labels:
  folder_path = os.path.join(test_data_directory, i)
  for j in os.listdir(folder_path):
    img = cv2.imread(os.path.join(folder_path, j))
    img = cv2.resize(img, (image_size, image_size))
    X_train.append(img)
    Y_train.append(i)

In [12]:
# convert variables to numpy array

X_train = np.array(X_train)
Y_train = np.array(Y_train)

print(X_train.shape) #images:928, size: 150x150, channels:3
print(Y_train.shape)  #labels:928


(928, 150, 150, 3)
(928,)


In [13]:
# shuffle the images and their labels together (do it at the same time)

X_train, Y_train = shuffle(X_train, Y_train, random_state = 101) #high random_state to reaaaally shuffle

print(X_train.shape)


(928, 150, 150, 3)


### Train-Test Split

In [22]:
# import libraries
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import Xception
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model
import numpy as np
import numpy as np
import os
from PIL import Image
import cv2
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical

In [15]:
# split the data
X_train, X_test, y_train, y_test = train_test_split(X_train, Y_train, test_size = 0.2, random_state = 101) #test_size:10% because it is a small data set


In [25]:
#CNN Model

X_train = X_train / 255.0
X_test = X_test / 255.0

label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

# Convert labels to categorical (one-hot encoding)
y_train_categorical = to_categorical(y_train_encoded)
y_test_categorical = to_categorical(y_test_encoded)

# Step 6: Define the CNN Model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(4, activation='softmax')  # 4 classes: Normal, Adeno, Large, Squamous
])

# Step 7: Compile the Model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Step 8: Train the Model
history = model.fit(X_train, y_train_categorical, epochs=20, validation_split=0.2, batch_size=32)

# Step 9: Evaluate the Model
test_loss, test_accuracy = model.evaluate(X_test, y_test_categorical)
print(f'Test accuracy: {test_accuracy}')

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Test accuracy: 0.301075279712677


In [1]:
#exception model
import tensorflow as tf
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import Xception
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model
import numpy as np



# split the data
#X_train, X_test, y_train, y_test = train_test_split(X_train, Y_train, test_size = 0.2, random_state = 101) #test_size:10% because it is a small data set

# Load the CIFAR-10 dataset
(X_train, X_test), (y_train, y_test) = cifar10.load_data()

# Normalize the images to a range of 0 to 1
X_train = X_train.astype('float32') / 255.0
y_train = y_train.astype('float32') / 255.0

# One-hot encode the labels
X_test = to_categorical(X_test, 10)
y_test = to_categorical(y_test, 10)

# Load the base Xception model
base_model = Xception(weights='imagenet', include_top=False, input_shape=(299, 299, 3))

# Add custom layers on top of the base model
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)  # Add dropout for regularization
x = Dense(128, activation='relu')(x)
predictions = Dense(10, activation='softmax')(x)  # CIFAR-10 has 10 classes

# Combine base model with custom layers
model = Model(inputs=base_model.input, outputs=predictions)

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    validation_split=0.2  # Use 20% of training data for validation
)

train_datagen = datagen.flow(X_train, X_test, subset='training')
val_datagen = datagen.flow(y_train, y_test, subset='validation')

# Train the model
history = model.fit(
    train_datagen,
    epochs=50,
    validation_data=val_datagen
)

# Resize test images to match the input size of Xception (299x299)
test_images_resized = np.array([tf.image.resize(image, (299, 299)).numpy() for image in y_train])

# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(test_images_resized, y_test)
print(f'Test accuracy: {test_acc}')

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5
Epoch 1/50
  11/1250 [..............................] - ETA: 36:17 - loss: 2.3030 - accuracy: 0.0881

KeyboardInterrupt: 

Resources x Research

---

1. [Dataset](https://www.kaggle.com/datasets/mohamedhanyyy/chest-ctscan-images/code?datasetId=839140&sortBy=commentCount)

2. [High-Accuracy : ResNets & VGG16](https://www.kaggle.com/code/rxnach/high-accuracy-resnets-vgg16)

3. [EnsembleModel_CTScan](https://www.kaggle.com/code/prthmgoyl/ensemblemodel-ctscan)


4. [Youtube 1](https://www.youtube.com/watch?v=-zmBMxpNDqQ&list=PLXCapw88C2b65E9ZlMx0dIQlgCZtvEsBF&index=6&t=432s)

5. [Youtube 2](https://www.youtube.com/watch?v=juJYmc4vrWU&t=121s)

6. [Youtube 3](https://www.youtube.com/live/Gy3B1l-iadA?si=qiw7Z17FflX4U9Dn)
