In [1]:
import os
import cv2
import numpy as np


In [2]:
# Define the path where images are stored
data_dir = 'lung_colon_image_set\lung_image_sets'  # dataset path
classes = os.listdir(data_dir)  # List of class folder names

X = []
Y = []


In [3]:
classes

['lung_aca', 'lung_n', 'lung_scc']

In [4]:
# Image size (resize all images to the same size for consistency)
IMG_SIZE = (128, 128)  # as we are using MobilNetV2

# Loop through each class and read images
for class_index, class_name in enumerate(classes):
    class_path = os.path.join(data_dir, class_name)
    
    for img_name in os.listdir(class_path):
        img_path = os.path.join(class_path, img_name)
        
        # Read and preprocess the image
        img = cv2.imread(img_path)  # Read the image
        img = cv2.resize(img, IMG_SIZE)  # Resize the image
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert to RGB
        img = img / 255.0  # Normalize pixel values

        # Append to lists
        X.append(img)
        Y.append(class_index)  # Use class index as label

# Convert to NumPy arrays
X = np.array(X, dtype=np.float32)
Y = np.array(Y, dtype=np.int32)

print(f"Shape of X: {X.shape}")
print(f"Shape of Y: {Y.shape}")


Shape of X: (15000, 128, 128, 3)
Shape of Y: (15000,)


In [5]:
from tensorflow.keras.utils import to_categorical

# Convert labels to one-hot encoding
Y = to_categorical(Y, num_classes=3)





In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from glob import glob

from sklearn.model_selection import train_test_split
from sklearn import metrics

import cv2
import gc

import tensorflow as tf
from tensorflow import keras
from keras import layers

import warnings
warnings.filterwarnings('ignore')


In [7]:
X_train, X_val, Y_train, Y_val = train_test_split(X, Y,test_size = 0.2,random_state = 42)
print(X_train.shape, X_val.shape,Y_train.shape)

(12000, 128, 128, 3) (3000, 128, 128, 3) (12000, 3)


In [8]:
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model

In [9]:
# Load MobileNetV2 without top layer
base_model = MobileNetV2(input_shape=(128, 128, 3), include_top=False, weights='imagenet')

# Freeze base model layers
base_model.trainable = False






In [10]:
# Add new layers
x = base_model.output
x = GlobalAveragePooling2D()(x)  # Pooling to reduce dimensions
x = Dense(128, activation='relu')(x)
x = Dense(64, activation='relu')(x)
output_layer = Dense(3, activation='softmax')(x)  # 3 classes: 0,1,2
# Define the full model
model = Model(inputs=base_model.input, outputs=output_layer)

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Show model summary
model.summary()



Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 128, 128, 3)]        0         []                            
                                                                                                  
 Conv1 (Conv2D)              (None, 64, 64, 32)           864       ['input_1[0][0]']             
                                                                                                  
 bn_Conv1 (BatchNormalizati  (None, 64, 64, 32)           128       ['Conv1[0][0]']               
 on)                                                                                              
                                                                                                  
 Conv1_relu (ReLU)           (None, 64, 64, 32)           0         ['bn_Conv1[0][0]']       

In [11]:
history = model.fit(X_train, Y_train, validation_data=(X_val, Y_val), batch_size=32, epochs=10, verbose=1)

Epoch 1/10


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [12]:
Y_pred = model.predict(X_val)



In [13]:
Y_pred.shape

(3000, 3)

In [14]:
Y_val.shape

(3000, 3)

In [15]:
Y_val = np.argmax(Y_val, axis=1)
Y_pred = np.argmax(Y_pred, axis=1)

In [16]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(Y_val, Y_pred)
print(cm)

[[1024    0   13]
 [   0  970    0]
 [  20    0  973]]


In [17]:
# save model
model.save('Lung_cancer_prediction.h5')
print('Model Saved!')


Model Saved!


In [18]:
from tensorflow.keras.models import load_model

# Load the trained model
model = load_model("Lung_cancer_prediction.h5")  # Replace with your model's filename


In [19]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing import image

# Define the class names (update based on your dataset)
class_names = ["Class 0 - lung_aca", "Class 1 - lung_n", "Class 2 - lung_scc"]  # Change accordingly
img_path = "lung_colon_image_set\colon_image_sets"
# Load and preprocess the image
def preprocess_image(img_path):
    img = image.load_img(img_path, target_size=(128, 128))  # Resize to MobileNetV2 input size
    img_array = image.img_to_array(img)  # Convert to numpy array
    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
    img_array = tf.keras.applications.mobilenet_v2.preprocess_input(img_array)  # Normalize
    return img_array


In [20]:
# Path to the image you want to classify
img_path = "test.jpeg"  #

# Preprocess the image
img_array = preprocess_image(img_path)

# Predict the class probabilities
predictions = model.predict(img_array)

# Get the class with highest probability
predicted_class_index = np.argmax(predictions, axis=1)[0]  # Get the highest probability class index
predicted_class_name = class_names[predicted_class_index]  # Get class name

# Print the result
print(f"Predicted Class: {predicted_class_name}")


Predicted Class: Class 0 - lung_aca
