<a href="https://colab.research.google.com/github/amanupg/Imageclef-2024/blob/main/Xception.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
from tensorflow.keras.applications import InceptionV3, VGG19, ResNet50, Xception, EfficientNetB0, MobileNetV2
import matplotlib.pyplot as plt
import numpy as np
import tensorflow.keras as keras
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
import pandas as pd
import pathlib
from sklearn.cluster import KMeans
from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation, MaxPooling2D, Conv2DTranspose, Concatenate, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger, ReduceLROnPlateau, EarlyStopping, TensorBoard

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
base_model = Xception(weights='imagenet', include_top=False)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
input_shape = (256, 256, 1)  # Grayscale images
input_layer = tf.keras.layers.Input(shape=input_shape)
gray_convert = tf.keras.layers.Lambda(lambda x: tf.image.grayscale_to_rgb(x))(input_layer)

# Pass through InceptionV3 base model
x = base_model(gray_convert)

# Add custom classification head
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(3, activation='softmax')(x)

In [None]:
model = Model(inputs=input_layer, outputs=predictions)

In [None]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
train_ds = keras.utils.image_dataset_from_directory(
    directory='/content/drive/MyDrive/Machine_Learning/Research/Train',
    labels='inferred',
    label_mode='int',
    batch_size=16,
    image_size=(256, 256),
    color_mode='grayscale'
)
validation_ds = keras.utils.image_dataset_from_directory(
    directory='/content/drive/MyDrive/Machine_Learning/Research/Test',
    labels='inferred',
    label_mode='int',
    batch_size=16,
    image_size=(256, 256),
    color_mode='grayscale'
)

Found 540 files belonging to 3 classes.
Found 60 files belonging to 3 classes.


In [None]:
def process(image, label):
    image = tf.cast(image / 255, tf.float32)
    label = tf.one_hot(label, depth=3)  # One-hot encode labels for 3 classes
    return image, label


train_ds = train_ds.map(process)
validation_ds = validation_ds.map(process)

In [None]:
model_path = "/content/drive/MyDrive/Machine_Learning/Research/Model/xception.h5"
csv_path = "/content/drive/MyDrive/Machine_Learning/Research/Model/xception.log"
callbacks = [
        ModelCheckpoint(model_path, verbose=1, save_best_only=True),
        CSVLogger(csv_path),
        TensorBoard(),
        EarlyStopping(monitor='val_loss', patience=50, restore_best_weights=False),
    ]

model.fit(
    train_ds,
    epochs=200,
    validation_data=validation_ds,
    callbacks=callbacks,
    shuffle=False
)

Epoch 1/200
Epoch 1: val_loss improved from inf to 27.94791, saving model to /content/drive/MyDrive/Machine_Learning/Research/Model/xception.h5


  saving_api.save_model(


Epoch 2/200
Epoch 2: val_loss did not improve from 27.94791
Epoch 3/200
Epoch 3: val_loss improved from 27.94791 to 17.14929, saving model to /content/drive/MyDrive/Machine_Learning/Research/Model/xception.h5
Epoch 4/200
Epoch 4: val_loss improved from 17.14929 to 0.15811, saving model to /content/drive/MyDrive/Machine_Learning/Research/Model/xception.h5
Epoch 5/200
Epoch 5: val_loss did not improve from 0.15811
Epoch 6/200
Epoch 6: val_loss did not improve from 0.15811
Epoch 7/200
Epoch 7: val_loss did not improve from 0.15811
Epoch 8/200
Epoch 8: val_loss did not improve from 0.15811
Epoch 9/200
Epoch 9: val_loss did not improve from 0.15811
Epoch 10/200
Epoch 10: val_loss did not improve from 0.15811
Epoch 11/200
Epoch 11: val_loss did not improve from 0.15811
Epoch 12/200
Epoch 12: val_loss did not improve from 0.15811
Epoch 13/200
Epoch 13: val_loss did not improve from 0.15811
Epoch 14/200
Epoch 14: val_loss did not improve from 0.15811
Epoch 15/200
Epoch 15: val_loss did not imp

<keras.src.callbacks.History at 0x7a309ff73d30>

In [None]:
model = tf.keras.models.load_model('/content/drive/MyDrive/Machine_Learning/Research/Model/xception.h5')

In [None]:
# Remove the last layer of the model
feature_extractor = Model(inputs=model.input, outputs=model.layers[-2].output)

# Extract features from the test dataset
test_features = feature_extractor.predict(validation_ds)

num_clusters = 4  # specify the number of clusters
kmeans = KMeans(n_clusters=num_clusters)
clusters = kmeans.fit_predict(test_features)

# clusters variable now contains the cluster labels for each image in the test dataset

kmeans.labels_





array([1, 0, 3, 2, 2, 2, 3, 0, 3, 3, 2, 1, 2, 1, 2, 2, 0, 2, 2, 1, 2, 1,
       1, 1, 3, 1, 2, 1, 0, 2, 0, 3, 0, 2, 1, 1, 2, 1, 3, 2, 1, 0, 1, 2,
       1, 2, 0, 0, 1, 1, 1, 2, 3, 1, 2, 1, 0, 0, 0, 2], dtype=int32)

In [None]:
data_dir = '/content/drive/MyDrive/Machine_Learning/Research/ImageCLEFmedical  GANs 2024 - Task2 test/generated_images'
data_dir = pathlib.Path(data_dir)

In [None]:
image_count = len(list(data_dir.glob('*.*')))

# Create a dataset of image file paths
list_ds = tf.data.Dataset.list_files(str(data_dir/'*'), shuffle=False)
list_ds = list_ds.shuffle(image_count, reshuffle_each_iteration=False)

In [None]:
def process_path(file_path):
    img = tf.io.read_file(file_path)
    img = tf.image.decode_png(img, channels=1)  # Use decode_png if images are png format
    img = tf.image.resize(img, [256, 256])
    img = tf.expand_dims(img, axis=-1)  # Add a channel dimension
    img = (img / 255.0)  # Normalize pixels to 0,1
    return img


In [None]:
new_dataset = list_ds.map(process_path, num_parallel_calls=tf.data.AUTOTUNE)


In [None]:
# Remove the last layer of the model
feature_extractor = Model(inputs=model.input, outputs=model.layers[-2].output)

# Batch the dataset
new_dataset = new_dataset.batch(16)  # Replace 16 with your desired batch size

# Extract features from the new dataset
new_features = feature_extractor.predict(new_dataset)

# Perform clustering on the extracted features
num_clusters = 4  # specify the number of clusters
kmeans = KMeans(n_clusters=num_clusters)
new_clusters = kmeans.fit_predict(new_features)

# new_clusters variable now contains the cluster labels for each image in the new dataset
print(kmeans.labels_)






[2 2 2 ... 2 2 2]


In [None]:
print(kmeans.labels_[:20])

[2 2 2 0 2 0 2 1 2 2 2 2 2 2 2 2 0 2 1 0]


In [None]:
# Load the provided run.csv file
provided_run_path = '/content/drive/MyDrive/Machine_Learning/Research/ImageCLEFmedical  GANs 2024 - Task2 test/xception.csv'

# Read the file without header
provided_run_df = pd.read_csv(provided_run_path, header=None, names=['data'])

# Map figure IDs to cluster labels
figure_ids = provided_run_df['data'].str.split('\t', expand=True)[0]  # Extract figure IDs
cluster_mapping = dict(zip(figure_ids, new_clusters + 1))  # Adding 1 to clusters to match the required labels [1, 2, 3, 4]

# Create a DataFrame with image IDs and corresponding cluster mappings
result_df = pd.DataFrame({'ImageID': figure_ids, 'Cluster': [cluster_mapping[x] for x in figure_ids]})

# Save the DataFrame to the same CSV file (overwrite the existing file)
result_df.to_csv(provided_run_path, index=False, header=False)
