# loading the dataset

In [None]:
import os
import cv2
import numpy as np

# Step 1.1: Load the Iraq oncology dataset (unlabeled)
iraq_dataset_path = r"C:\Users\ADMIN\Desktop\techieyan projects\semi supervised learning deep transfer learning for pulomanry nodules detetction using ct images\Dataset\2\Unlabelled"
iraq_images = []
desired_width = 224  # Replace with the desired width
desired_height = 224 
for filename in os.listdir(iraq_dataset_path):
    image_path = os.path.join(iraq_dataset_path, filename)
    image = cv2.imread(image_path)  # Load the image using OpenCV
    # Preprocess the image if needed (e.g., resizing, normalization)
    image = cv2.resize(image, (desired_width, desired_height))  # Replace desired_width and desired_height with the desired size
    iraq_images.append(image)
    
iraq_images = np.array(iraq_images)


# Step 1.2: Load the CT-Scan images dataset (labeled)
ct_scan_dataset_path = r"C:\Users\ADMIN\Desktop\techieyan projects\semi supervised learning deep transfer learning for pulomanry nodules detetction using ct images\Dataset\1\CT-Scan Images"
labeled_images = []
labels = []
for label in ['Benign cases', 'Malignant cases']:
    label_path = os.path.join(ct_scan_dataset_path, label)
    for filename in os.listdir(label_path):
        image_path = os.path.join(label_path, filename)
        image = cv2.imread(image_path)  # Load the image using OpenCV
        # Preprocess the image if needed (e.g., resizing, normalization)
        image = cv2.resize(image, (desired_width, desired_height))  # Replace desired_width and desired_height with the desired size
        labeled_images.append(image)
        labels.append(label)

labeled_images = np.array(labeled_images)
labels = np.array(labels)

# Ensure the images and labels are properly paired
assert len(labeled_images) == len(labels), "Number of images and labels should be the same"

print("Unlabeled dataset shape:", iraq_images.shape)
print("Labeled dataset shape:", labeled_images.shape)
print("Labels shape:", labels.shape)


In [None]:
import cv2
import matplotlib.pyplot as plt

# Display some images from the Iraq oncology dataset
num_images_displayed = 5  # Number of images to display
for i in range(num_images_displayed):
    image = iraq_images[i]
    plt.figure()
    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    plt.axis('off')
    plt.title('Iraq Oncology Image')
plt.show()

# Display some images from the CT-Scan images dataset
num_images_displayed = 5  # Number of images to display
for i in range(num_images_displayed):
    image = labeled_images[i]
    label = labels[i]
    plt.figure()
    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    plt.axis('off')
    plt.title('CT-Scan Image - {}'.format(label))
plt.show()


# augmentation

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os

# Define the data augmentation parameters
augmentation_params = {
    'rotation_range': 20,       # Random rotation (±20 degrees)
    'width_shift_range': 0.1,   # Random horizontal shift
    'height_shift_range': 0.1,  # Random vertical shift
    'shear_range': 0.1,         # Shear transformation
    'zoom_range': 0.1,          # Random zoom
    'horizontal_flip': True,    # Horizontal flip
    'fill_mode': 'nearest'      # Fill mode for newly created pixels
}

# Create an ImageDataGenerator with augmentation parameters
data_augmentor = ImageDataGenerator(**augmentation_params)

# Define the output directory to save the augmented images
output_dir = r"C:\Users\ADMIN\Desktop\techieyan projects\semi supervised learning deep transfer learning for pulomanry nodules detetction using ct images\Dataset\1\CT-Scan Images\augmented"

# Create the output directory if it doesn't exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Apply data augmentation and save the generated images
for i in range(len(labeled_images)):
    image = labeled_images[i]
    label = labels[i]
    label_dir = os.path.join(output_dir, label)
    if not os.path.exists(label_dir):
        os.makedirs(label_dir)
    img_gen = data_augmentor.flow(np.expand_dims(image, axis=0), batch_size=1, save_to_dir=label_dir, save_prefix='aug', save_format='png')
    for _ in range(5):  # Generate 5 augmented images per original image
        augmented_img = next(img_gen)[0].astype(np.uint8)
        # You can perform additional preprocessing or saving steps if needed


In [None]:
print("augmentation done")

In [None]:
import matplotlib.pyplot as plt

# Step 2.2: Check for data imbalances
unique_labels, counts = np.unique(labels, return_counts=True)

# Visualize the class distribution
plt.figure(figsize=(8, 6))
plt.bar(unique_labels, counts)
plt.xlabel('Class')
plt.ylabel('Number of Samples')
plt.title('Class Distribution')
plt.show()

# Print the class distribution
for label, count in zip(unique_labels, counts):
    print(f"Class: {label}, Count: {count}")


In [None]:
import os
import cv2
import numpy as np
from tensorflow.keras.utils import to_categorical
from imblearn.over_sampling import RandomOverSampler
import matplotlib.pyplot as plt

# Step 2.2: Check for data imbalances
unique_labels, counts = np.unique(labels, return_counts=True)

# Visualize the class distribution before oversampling
plt.figure(figsize=(8, 6))
plt.bar(unique_labels, counts)
plt.xlabel('Class')
plt.ylabel('Number of Samples')
plt.title('Class Distribution (Before Oversampling)')
plt.show()

# Apply oversampling to balance the dataset
ros = RandomOverSampler(random_state=42)
labeled_images_reshaped = labeled_images.reshape(-1, labeled_images.shape[1]*labeled_images.shape[2]*labeled_images.shape[3])
labeled_images_resampled, labels_resampled = ros.fit_resample(labeled_images_reshaped, labels)

# Convert back to the original image shape
labeled_images_resampled = labeled_images_resampled.reshape(-1, labeled_images.shape[1], labeled_images.shape[2], labeled_images.shape[3])

# Visualize the class distribution after oversampling
unique_labels_resampled, counts_resampled = np.unique(labels_resampled, return_counts=True)
plt.figure(figsize=(8, 6))
plt.bar(unique_labels_resampled, counts_resampled)
plt.xlabel('Class')
plt.ylabel('Number of Samples')
plt.title('Class Distribution (After Oversampling)')
plt.show()

# Print the class distribution after oversampling
for label, count in zip(unique_labels_resampled, counts_resampled):
    print(f"Class: {label}, Count: {count}")


# segmentation

In [None]:
import cv2
import numpy as np
import os

# Define the directory containing the augmented images
input_dir = r"C:\Users\ADMIN\Desktop\techieyan projects\semi supervised learning deep transfer learning for pulomanry nodules detetction using ct images\Dataset\1\CT-Scan Images\augmented"

# Define the directory to save the segmented ROIs
output_dir = r"C:\Users\ADMIN\Desktop\techieyan projects\semi supervised learning deep transfer learning for pulomanry nodules detetction using ct images\Dataset\1\CT-Scan Images\segmented"

# Create the output directory if it doesn't exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Loop through the augmented images
for label in os.listdir(input_dir):
    label_dir = os.path.join(input_dir, label)
    for filename in os.listdir(label_dir):
        image_path = os.path.join(label_dir, filename)
        image = cv2.imread(image_path)

        # Convert the image to grayscale
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

        # Apply thresholding to segment the lung region
        _, thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)

        # Find contours in the thresholded image
        contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        # Create a blank mask to draw the segmented ROIs
        mask = np.zeros_like(image)

        # Loop through the contours and draw the segmented ROIs on the mask
        for contour in contours:
            # Skip small contours
            if cv2.contourArea(contour) < 100:
                continue
            # Draw the contour on the mask
            cv2.drawContours(mask, [contour], -1, (0, 255, 0), thickness=cv2.FILLED)

        # Apply the mask to the original image to extract the segmented ROIs
        segmented_roi = cv2.bitwise_and(image, mask)

        # Save the segmented ROI to the output directory
        output_path = os.path.join(output_dir, label)
        if not os.path.exists(output_path):
            os.makedirs(output_path)
        output_filename = os.path.splitext(filename)[0] + '_segmented.png'
        output_image_path = os.path.join(output_path, output_filename)
        cv2.imwrite(output_image_path, segmented_roi)


In [None]:
import cv2
import numpy as np
import os
import matplotlib.pyplot as plt

# Define the directory containing the augmented images
input_dir = r"C:\Users\ADMIN\Desktop\techieyan projects\semi supervised learning deep transfer learning for pulomanry nodules detetction using ct images\Dataset\1\CT-Scan Images\augmented"

# Define the directory to save the segmented ROIs
output_dir = r"C:\Users\ADMIN\Desktop\techieyan projects\semi supervised learning deep transfer learning for pulomanry nodules detetction using ct images\Dataset\1\CT-Scan Images\segmented"

# Create the output directory if it doesn't exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Loop through the augmented images
for label in os.listdir(input_dir):
    label_dir = os.path.join(input_dir, label)
    for filename in os.listdir(label_dir):
        image_path = os.path.join(label_dir, filename)
        image = cv2.imread(image_path)

        # Convert the image to grayscale
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

        # Apply thresholding to segment the lung region
        _, thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)

        # Find contours in the thresholded image
        contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        # Create a blank mask to draw the segmented ROIs
        mask = np.zeros_like(image)

        # Loop through the contours and draw the segmented ROIs on the mask
        for contour in contours:
            # Skip small contours
            if cv2.contourArea(contour) < 100:
                continue
            # Draw the contour on the mask
            cv2.drawContours(mask, [contour], -1, (0, 255, 0), thickness=cv2.FILLED)

        # Apply the mask to the original image to extract the segmented ROIs
        segmented_roi = cv2.bitwise_and(image, mask)

        # Save the segmented ROI to the output directory
        output_path = os.path.join(output_dir, label)
        if not os.path.exists(output_path):
            os.makedirs(output_path)
        output_filename = os.path.splitext(filename)[0] + '_segmented.png'
        output_image_path = os.path.join(output_path, output_filename)
        cv2.imwrite(output_image_path, segmented_roi)

        # Display the original image and segmented ROI
        fig, axs = plt.subplots(1, 2, figsize=(10, 5))
        axs[0].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        axs[0].set_title('Augmented Image')
        axs[0].axis('off')
        axs[1].imshow(cv2.cvtColor(segmented_roi, cv2.COLOR_BGR2RGB))
        axs[1].set_title('Segmented ROI')
        axs[1].axis('off')
        plt.tight_layout()
        plt.show()


# Build VGG16/EfficientNet Model

In [None]:
import os
import numpy as np
from tensorflow.keras.applications import VGG16, EfficientNetB0
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint


In [None]:
# Set the input directory for the segmented dataset
segmented_dataset_dir = r"C:\Users\ADMIN\Desktop\techieyan projects\semi supervised learning deep transfer learning for pulomanry nodules detetction using ct images\Dataset\1\CT-Scan Images\segmented"

# Set the output directory to save the trained model
output_dir = r"C:\Users\ADMIN\Desktop\techieyan projects\semi supervised learning deep transfer learning for pulomanry nodules detetction using ct images\Dataset\1\CT-Scan Images"

# Define the number of classes
num_classes = 2

# Set the desired input shape for the model
input_shape = (224, 224, 3)


In [None]:
from sklearn.preprocessing import LabelEncoder
import cv2
from tensorflow.keras.utils import to_categorical

# Load the segmented dataset
labeled_images = []
labels = []
for label in ['Benign cases', 'Malignant cases']:
    label_path = os.path.join(segmented_dataset_dir, label)
    for filename in os.listdir(label_path):
        image_path = os.path.join(label_path, filename)
        image = cv2.imread(image_path)
        # Preprocess the image if needed (e.g., resizing, normalization)
        image = cv2.resize(image, input_shape[:2])
        labeled_images.append(image)
        labels.append(label)

labeled_images = np.array(labeled_images)

# Encode the string labels into numeric values
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

# Convert the labels to one-hot encoded vectors
labels = to_categorical(labels, num_classes=num_classes)

# Shuffle the data
indices = np.arange(len(labeled_images))
np.random.shuffle(indices)
labeled_images = labeled_images[indices]
labels = labels[indices]


In [None]:
# Initialize the base model
base_model = VGG16(weights='imagenet', include_top=False, input_shape=input_shape)
#base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=input_shape)

# Build the model architecture
model = Sequential()
model.add(base_model)
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.0001), metrics=['accuracy'])


In [None]:
# Define the batch size and number of epochs
batch_size = 32
epochs = 10

# Define the model checkpoint to save the best model during training
checkpoint = ModelCheckpoint(os.path.join(output_dir, 'best_model.h5'), monitor='val_accuracy',
                             save_best_only=True, mode='max', verbose=1)

# Train the model
history = model.fit(labeled_images, labels, batch_size=batch_size, epochs=epochs,
                    validation_split=0.2, callbacks=[checkpoint])


# removing last few layers

In [None]:
import os
import numpy as np
from tensorflow.keras.applications import VGG16, EfficientNetB0
from tensorflow.keras.models import Sequential,Model
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix


In [None]:
# Set the input directory for the segmented dataset
segmented_dataset_dir = r"C:\Users\ADMIN\Desktop\techieyan projects\semi supervised learning deep transfer learning for pulomanry nodules detetction using ct images\Dataset\1\CT-Scan Images\segmented"

# Set the output directory to save the trained model
output_dir = r"C:\Users\ADMIN\Desktop\techieyan projects\semi supervised learning deep transfer learning for pulomanry nodules detetction using ct images\Dataset\1\CT-Scan Images"

# Define the number of classes
num_classes = 2

# Set the desired input shape for the model
input_shape = (224, 224, 3)


In [None]:
import cv2
# Load the segmented dataset
labeled_images = []
labels = []
for label in ['Benign cases', 'Malignant cases']:
    label_path = os.path.join(segmented_dataset_dir, label)
    for filename in os.listdir(label_path):
        image_path = os.path.join(label_path, filename)
        image = cv2.imread(image_path)
        # Preprocess the image if needed (e.g., resizing, normalization)
        image = cv2.resize(image, input_shape[:2])
        labeled_images.append(image)
        labels.append(label)

labeled_images = np.array(labeled_images)
labels = np.array(labels)

# Convert the labels to binary format (0 for benign, 1 for malignant)
labels = np.where(labels == 'Benign cases', 0, 1)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(labeled_images, labels, test_size=0.2, random_state=42)


In [None]:
# Initialize the base model
base_model = VGG16(weights='imagenet', include_top=False, input_shape=input_shape)
#base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=input_shape)

# Freeze the layers in the base model
for layer in base_model.layers:
    layer.trainable = False

# Remove the last few layers from the base model
x = base_model.layers[-1].output
x = Flatten()(x)

# Add new layers for binary classification
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
output = Dense(1, activation='sigmoid')(x)

# Create the modified model
model = Model(inputs=base_model.input, outputs=output)

# Compile the model
model.compile(loss='binary_crossentropy', optimizer=Adam(lr=0.0001), metrics=['accuracy'])


In [None]:
# Define the batch size and number of epochs
batch_size = 32
epochs = 10

# Define the model checkpoint to save the best model during training
checkpoint = ModelCheckpoint(os.path.join(output_dir, 'best_model.h5'), monitor='val_accuracy',
                             save_best_only=True, mode='max', verbose=1)

# Train the model
history = model.fit(labeled_images, labels, batch_size=batch_size, epochs=epochs,
                    validation_split=0.2, callbacks=[checkpoint])


In [None]:
# Load the best model saved during training
model.load_weights(os.path.join(output_dir, 'best_model.h5'))

# Evaluate the model on the test set
y_pred = model.predict(X_test)
y_pred = np.round(y_pred).flatten()

# Print classification report and confusion matrix
print("Classification Report:")
print(classification_report(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))


# Applying SSL algorithm Apply Mean Teacher Algo then labelling the unlabelled data by using pseudo labelling

In [None]:
import os
import cv2
import numpy as np
from tensorflow.keras.applications import VGG16, EfficientNetB0
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint


In [None]:
# Set the input directory for the unlabeled data
unlabeled_dataset_dir = r"C:\Users\ADMIN\Desktop\techieyan projects\semi supervised learning deep transfer learning for pulomanry nodules detetction using ct images\Dataset\2\Unlabelled"

# Set the output directory to save the pseudo-labeled data
output_dir = r"C:\Users\ADMIN\Desktop\techieyan projects\semi supervised learning deep transfer learning for pulomanry nodules detetction using ct images\Dataset\2\pseudo-label"

# Define the number of classes
num_classes = 2

# Set the desired input shape for the model
input_shape = (224, 224, 3)


In [None]:
# Initialize the base model
base_model = VGG16(weights='imagenet', include_top=False, input_shape=input_shape)
#base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=input_shape)

# Build the model architecture
model = Sequential()
model.add(base_model)
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

# Load the trained model weights
model.load_weights("trained_model/best_model.h5")


In [None]:
# Create the output directory if it doesn't exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Loop through the unlabeled data
for filename in os.listdir(unlabeled_dataset_dir):
    image_path = os.path.join(unlabeled_dataset_dir, filename)
    image = cv2.imread(image_path)

    # Preprocess the image if needed (e.g., resizing, normalization)
    image = cv2.resize(image, input_shape[:2])

    # Perform inference to get the model's prediction
    pred = model.predict(np.expand_dims(image, axis=0))
    label = np.argmax(pred)

    # Save the pseudo-labeled data into the output directory
    output_path = os.path.join(output_dir, f"label_{label}")
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    output_filename = f"pseudo_label_{label}_{filename}"
    output_image_path = os.path.join(output_path, output_filename)
    cv2.imwrite(output_image_path, image)


In [None]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import VGG16, EfficientNetB0
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.metrics import classification_report, confusion_matrix


In [None]:
# Set the input directories for the labeled and pseudo-labeled data
labeled_dataset_dir = r"C:\Users\ADMIN\Desktop\techieyan projects\semi supervised learning deep transfer learning for pulomanry nodules detetction using ct images\Dataset\1\CT-Scan Images"
pseudo_labeled_dataset_dir = r"C:\Users\ADMIN\Desktop\techieyan projects\semi supervised learning deep transfer learning for pulomanry nodules detetction using ct images\Dataset\2\pseudo-label"

# Set the output directory to save the trained model
output_dir = r"C:\Users\ADMIN\Desktop\techieyan projects\semi supervised learning deep transfer learning for pulomanry nodules detetction using ct images\combined_validation"

# Define the number of classes
num_classes = 2

# Set the desired input shape for the model
input_shape = (224, 224, 3)

# Set the portion of labeled and pseudo-labeled data to use for the combined validation set
validation_split = 0.2


In [None]:
# Load the labeled data
labeled_images = []
labels = []
for label in ['Benign cases', 'Malignant cases']:
    label_path = os.path.join(labeled_dataset_dir, label)
    for filename in os.listdir(label_path):
        image_path = os.path.join(label_path, filename)
        image = cv2.imread(image_path)
        # Preprocess the image if needed (e.g., resizing, normalization)
        image = cv2.resize(image, input_shape[:2])
        labeled_images.append(image)
        labels.append(label)

labeled_images = np.array(labeled_images)
labels = np.array(labels)

# Convert the labels to binary format (0 for benign, 1 for malignant)
labels = np.where(labels == 'Benign cases', 0, 1)

# Load the pseudo-labeled data
pseudo_labeled_images = []
pseudo_labels = []
for label_dir in os.listdir(pseudo_labeled_dataset_dir):
    label_path = os.path.join(pseudo_labeled_dataset_dir, label_dir)
    label = int(label_dir.split("_")[-1])  # Extract the pseudo-label from the folder name
    for filename in os.listdir(label_path):
        image_path = os.path.join(label_path, filename)
        image = cv2.imread(image_path)
        # Preprocess the image if needed (e.g., resizing, normalization)
        image = cv2.resize(image, input_shape[:2])
        pseudo_labeled_images.append(image)
        pseudo_labels.append(label)

pseudo_labeled_images = np.array(pseudo_labeled_images)
pseudo_labels = np.array(pseudo_labels)

# Combine the labeled and pseudo-labeled data
combined_images = np.concatenate((labeled_images, pseudo_labeled_images), axis=0)
combined_labels = np.concatenate((labels, pseudo_labels), axis=0)


In [None]:
X_train, X_val, y_train, y_val = train_test_split(combined_images, combined_labels,
                                                  test_size=validation_split, random_state=42)


In [None]:
# Set the number of classes
num_classes = 2  # Update this according to your dataset

# Initialize the base model and build the model architecture
base_model = VGG16(weights='imagenet', include_top=False, input_shape=input_shape)
model = Sequential()
model.add(base_model)
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

# Compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(lr=0.0001), metrics=['accuracy'])

# Define the batch size and number of epochs
batch_size = 32
epochs = 10

# Define the model checkpoint to save the best model during training
checkpoint = ModelCheckpoint(os.path.join(output_dir, 'best_model.h5'), monitor='val_accuracy',
                             save_best_only=True, mode='max', verbose=1)

# Train the model on the combined labeled and pseudo-labeled data
history = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs,
                    validation_data=(X_val, y_val), callbacks=[checkpoint])


# fine tuning combine data and fit model on combined data

In [None]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import VGG16, EfficientNetB0
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.metrics import classification_report, confusion_matrix


In [None]:
# Set the input directories for the labeled and pseudo-labeled data
labeled_dataset_dir = r"C:\Users\ADMIN\Desktop\techieyan projects\semi supervised learning deep transfer learning for pulomanry nodules detetction using ct images\Dataset\1\CT-Scan Images"
pseudo_labeled_dataset_dir = r"C:\Users\ADMIN\Desktop\techieyan projects\semi supervised learning deep transfer learning for pulomanry nodules detetction using ct images\Dataset\2\pseudo-label"

# Set the output directory to save the trained model
output_dir = r"C:\Users\ADMIN\Desktop\techieyan projects\semi supervised learning deep transfer learning for pulomanry nodules detetction using ct images\combined_data"

# Define the number of classes
num_classes = 2

# Set the desired input shape for the model
input_shape = (224, 224, 3)


In [None]:
# Load the labeled data
labeled_images = []
labels = []
for label in ['Benign cases', 'Malignant cases']:
    label_path = os.path.join(labeled_dataset_dir, label)
    for filename in os.listdir(label_path):
        image_path = os.path.join(label_path, filename)
        image = cv2.imread(image_path)
        # Preprocess the image if needed (e.g., resizing, normalization)
        image = cv2.resize(image, input_shape[:2])
        labeled_images.append(image)
        labels.append(label)

labeled_images = np.array(labeled_images)
labels = np.array(labels)

# Convert the labels to binary format (0 for benign, 1 for malignant)
labels = np.where(labels == 'Benign cases', 0, 1)

# Load the pseudo-labeled data
pseudo_labeled_images = []
pseudo_labels = []
for label_dir in os.listdir(pseudo_labeled_dataset_dir):
    label_path = os.path.join(pseudo_labeled_dataset_dir, label_dir)
    label = int(label_dir.split("_")[-1])  # Extract the pseudo-label from the folder name
    for filename in os.listdir(label_path):
        image_path = os.path.join(label_path, filename)
        image = cv2.imread(image_path)
        # Preprocess the image if needed (e.g., resizing, normalization)
        image = cv2.resize(image, input_shape[:2])
        pseudo_labeled_images.append(image)
        pseudo_labels.append(label)

pseudo_labeled_images = np.array(pseudo_labeled_images)
pseudo_labels = np.array(pseudo_labels)

# Combine the labeled and pseudo-labeled data
combined_images = np.concatenate((labeled_images, pseudo_labeled_images), axis=0)
combined_labels = np.concatenate((labels, pseudo_labels), axis=0)


In [None]:
X_train, X_test, y_train, y_test = train_test_split(combined_images, combined_labels,
                                                    test_size=0.2, random_state=42)


In [None]:
# Initialize the base model
base_model = VGG16(weights='imagenet', include_top=False, input_shape=input_shape)
# base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=input_shape)

# Build the model architecture
model = Sequential()
model.add(base_model)
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

# Compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(lr=0.0001), metrics=['accuracy'])

# Define the batch size and number of epochs
batch_size = 32
epochs = 10

# Define the model checkpoint to save the best model during training
checkpoint = ModelCheckpoint(os.path.join(output_dir, 'best_model.h5'), monitor='val_accuracy',
                             save_best_only=True, mode='max', verbose=1)

# Train the model on the combined labeled and pseudo-labeled data
history = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs,
                    validation_data=(X_test, y_test), callbacks=[checkpoint])


# evaluating the model performance on combined data

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix


In [None]:
model.load_weights(os.path.join(output_dir, 'best_model.h5'))


In [None]:
y_pred = model.predict(X_test)
y_pred = np.argmax(y_pred, axis=1)


In [None]:
report = classification_report(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
accuracy = np.sum(y_pred == y_test) / len(y_test)


In [None]:
print("Classification Report:")
print(report)

print("Confusion Matrix:")
print(conf_matrix)


In [None]:
# Plot the confusion matrix
plt.figure(figsize=(8, 6))
plt.imshow(conf_matrix, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.colorbar()
tick_marks = np.arange(num_classes)
plt.xticks(tick_marks, ['Benign', 'Malignant'])
plt.yticks(tick_marks, ['Benign', 'Malignant'])
plt.xlabel('Predicted Class')
plt.ylabel('True Class')

# Add labels to the plot
thresh = conf_matrix.max() / 2
for i in range(conf_matrix.shape[0]):
    for j in range(conf_matrix.shape[1]):
        plt.text(j, i, format(conf_matrix[i, j], 'd'), ha="center", va="center",
                 color="white" if conf_matrix[i, j] > thresh else "black")

plt.show()

# Plot the classification report metrics
precision = report.split('\n')[2].split()[1:]
recall = report.split('\n')[3].split()[1:]
f1_score = report.split('\n')[4].split()[1:]

plt.figure(figsize=(8, 6))
x = np.arange(len(precision))
bar_width = 0.2
plt.bar(x, precision, width=bar_width, label='Precision')
plt.bar(x + bar_width, recall, width=bar_width, label='Recall')
plt.bar(x + 2 * bar_width, f1_score, width=bar_width, label='F1-score')
plt.xlabel('Class')
plt.ylabel('Score')
plt.title('Classification Metrics')
plt.xticks(x + bar_width, ['Benign', 'Malignant'])
plt.legend()
plt.show()

# Plot the accuracy
plt.figure(figsize=(8, 6))
plt.bar(['Accuracy'], [accuracy])
plt.ylim(0, 1)
plt.title('Accuracy')
plt.show()


# CoSSL Algorithm 

In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.metrics import classification_report, confusion_matrix

# Set the input directories for the labeled and pseudo-labeled data
labeled_dataset_dir = r"C:\Users\ADMIN\Desktop\Techieyan\semi supervised learning deep transfer learning for pulomanry nodules detetction using ct images\Dataset\1\CT-Scan Images"
pseudo_labeled_dataset_dir = r"C:\Users\ADMIN\Desktop\Techieyan\semi supervised learning deep transfer learning for pulomanry nodules detetction using ct images\Dataset\2\pseudo-label"

# Set the output directory to save the trained model
output_dir = r"C:\Users\ADMIN\Desktop\Techieyan\semi supervised learning deep transfer learning for pulomanry nodules detetction using ct images\combined_data"

# Define the number of classes
num_classes = 2

# Set the desired input shape for the model
input_shape = (224, 224, 3)

# Set the portion of labeled and pseudo-labeled data to use for the combined validation set
validation_split = 0.2

# Load the labeled data
labeled_images = []
labels = []
for label in ['Benign cases', 'Malignant cases']:
    label_path = os.path.join(labeled_dataset_dir, label)
    for filename in os.listdir(label_path):
        image_path = os.path.join(label_path, filename)
        image = cv2.imread(image_path)
        # Preprocess the image if needed (e.g., resizing, normalization)
        image = cv2.resize(image, input_shape[:2])
        labeled_images.append(image)
        labels.append(label)

labeled_images = np.array(labeled_images)
labels = np.array(labels)

# Convert the labels to binary format (0 for benign, 1 for malignant)
labels = np.where(labels == 'Benign cases', 0, 1)

# Load the pseudo-labeled data
pseudo_labeled_images = []
pseudo_labels = []
for label_dir in os.listdir(pseudo_labeled_dataset_dir):
    label_path = os.path.join(pseudo_labeled_dataset_dir, label_dir)
    label = int(label_dir.split("_")[-1])  # Extract the pseudo-label from the folder name
    for filename in os.listdir(label_path):
        image_path = os.path.join(label_path, filename)
        image = cv2.imread(image_path)
        # Preprocess the image if needed (e.g., resizing, normalization)
        image = cv2.resize(image, input_shape[:2])
        pseudo_labeled_images.append(image)
        pseudo_labels.append(label)

pseudo_labeled_images = np.array(pseudo_labeled_images)
pseudo_labels = np.array(pseudo_labels)

# Combine the labeled and pseudo-labeled data
combined_images = np.concatenate((labeled_images, pseudo_labeled_images), axis=0)
combined_labels = np.concatenate((labels, pseudo_labels), axis=0)

# Split the combined data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(combined_images, combined_labels,
                                                  test_size=validation_split, random_state=42)

# Initialize the base model and build the model architecture
base_model = VGG16(weights='imagenet', include_top=False, input_shape=input_shape)
model = Sequential()
model.add(base_model)
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

# Compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(lr=0.0001), metrics=['accuracy'])

# Define the batch size and number of epochs
batch_size = 32
epochs = 10

# Define the model checkpoint to save the best model during training
checkpoint = ModelCheckpoint(os.path.join(output_dir, 'best_model.h5'), monitor='val_accuracy',
                             save_best_only=True, mode='max', verbose=1)

# Train the model on the combined labeled and pseudo-labeled data
history = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs,
                    validation_data=(X_val, y_val), callbacks=[checkpoint])

# Load the best model's weights
model.load_weights(os.path.join(output_dir, 'best_model.h5'))

# Evaluate the model on the test (or validation) set
y_pred = model.predict(X_val)
y_pred_classes = np.argmax(y_pred, axis=1)

# Convert the integer labels back to original class labels
class_labels = ['Benign', 'Malignant']
y_val_labels = np.array([class_labels[label] for label in y_val])
y_pred_labels = np.array([class_labels[label] for label in y_pred_classes])

# Generate the classification report
report = classification_report(y_val_labels, y_pred_labels)

# Generate the confusion matrix
conf_matrix = confusion_matrix(y_val_labels, y_pred_labels)

# Calculate accuracy
accuracy = np.sum(y_pred_classes == y_val) / len(y_val)

# Plot the confusion matrix
plt.figure(figsize=(8, 6))
plt.imshow(conf_matrix, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.colorbar()
tick_marks = np.arange(num_classes)
plt.xticks(tick_marks, ['Benign', 'Malignant'])
plt.yticks(tick_marks, ['Benign', 'Malignant'])
plt.xlabel('Predicted Class')
plt.ylabel('True Class')

# Add labels to the plot
thresh = conf_matrix.max() / 2
for i in range(conf_matrix.shape[0]):
    for j in range(conf_matrix.shape[1]):
        plt.text(j, i, format(conf_matrix[i, j], 'd'), ha="center", va="center",
                 color="white" if conf_matrix[i, j] > thresh else "black")

plt.show()

# Plot the classification report metrics
precision = report.split('\n')[2].split()[1:]
recall = report.split('\n')[3].split()[1:]
f1_score = report.split('\n')[4].split()[1:]

plt.figure(figsize=(8, 6))
x = np.arange(len(precision))
bar_width = 0.2
plt.bar(x, precision, width=bar_width, label='Precision')
plt.bar(x + bar_width, recall, width=bar_width, label='Recall')
plt.bar(x + 2 * bar_width, f1_score, width=bar_width, label='F1-score')
plt.xlabel('Class')
plt.ylabel('Score')
plt.title('Classification Metrics')
plt.xticks(x + bar_width, ['Benign', 'Malignant'])
plt.legend()
plt.show()

# Plot the accuracy
plt.figure(figsize=(8, 6))
plt.bar(['Accuracy'], [accuracy])
plt.ylim(0, 1)
plt.title('Accuracy')
plt.show()
