In [None]:
import numpy as np
import pandas as pd
import os
from pathlib import Path
import itertools
import pandas as pd
import numpy as np
import tensorflow as tf
import seaborn as sns
import matplotlib.pyplot as plt
from PIL import Image
import random
import cv2
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Activation,Dropout, Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam,  Adagrad
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras import models

In [None]:
data_path = Path(f"Data")
train_path = data_path / 'train'
test_path = data_path / "test"
val_path = data_path / "valid"

print(os.listdir(data_path))

In [None]:
# View data in a Data Frame
from data_utils import load_data, append_labeled_image
from visualization import plot
train_df = load_data(train_path, data = "train")
test_df = load_data(test_path, data = "test")
val_df = load_data(val_path, data = "validation")

In [None]:
print(f"Shape of the Train Dataframe = {train_df.shape}")
print(train_df.head())

In [None]:
print(f"Shape of the Test Dataframe = {train_df.shape}")
print(train_df.head())

In [None]:
print(f"Shape of the Validation Dataframe = {train_df.shape}")
print(train_df.head())

In [None]:
train_df.dtypes

In [None]:
train_df["label"] = train_df["label"].astype("category")

print(train_df.dtypes)
# Convert object to category help saving memmory and grouping intuitivity


In [None]:
# View cancer case by categories
print(f"Unique Cancer Cases {train_df['label'].unique()}")

print(train_df['label'].value_counts())


In [None]:
# Visualize the distribution of each Case
sns.countplot(x = "label", data = train_df)
plt.xticks(rotation=90)
plt.xlabel("Case #")
plt.ylabel("Count")
plt.show()

In [None]:
plot(train_df)

In [None]:
# Preprocess
from preprocess import preprocess_image, process_dataframe

In [None]:
label_encoder = LabelEncoder() # convert Label from categoricals labels to a number

# Train set
x_train, y_train = process_dataframe(train_df, label_encoder, fit_encoder=True)

# Validation and test sets (transform)
x_val, y_val = process_dataframe(val_df, label_encoder)
x_test, y_test = process_dataframe(test_df, label_encoder)

In [None]:
print(f"Number of Train examples: {len(x_test)}")
print(f"Number of Test examples: {len(x_test)}")
print(f"Number of Validation examples: {len(x_val)}")
print(f"Label classes: {label_encoder.classes_}")

In [None]:
# Check some decoded labels
print("First 5 test labels (decoded):", label_encoder.inverse_transform(y_test[10:15]))
print("Corresponding filenames:", test_df["images"].iloc[10:15].values)

In [None]:
NUM_CLASSES = train_df["label"].nunique()

In [None]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy

def resnet_model():
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    base_model.trainable = False  # Freeze the base model layers

    # Define the full model with unfreezed last layers
    model = models.Sequential([
        base_model,
        layers.GlobalAveragePooling2D(),
        layers.Dense(256, activation='relu'),
        layers.Dropout(0.25),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.25),
        layers.Dense(64, activation='relu'),
        layers.Dropout(0.25),
        layers.Dense(NUM_CLASSES, activation='softmax')
    ])
    
    # Compile the model
    optimizer = Adam(learning_rate=0.001)
    
    model.compile(loss=SparseCategoricalCrossentropy(),
                  optimizer=optimizer,
                  metrics=["accuracy"])
    
    return model


In [None]:
resnet_model = resnet_model()

# Define early stopping
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model
history = resnet_model.fit(
    x_train, y_train, batch_size=32,
    epochs=60,
    validation_data=(x_test, y_test),
    callbacks=[early_stop]
)
# Lưu mô hình đã huấn luyện
resnet_model.save('resnet_model.h5')  # Lưu dưới dạng file HDF5


In [None]:
from tensorflow.keras.models import load_model

# Tải lại mô hình từ file đã lưu
loaded_model = load_model('resnet_model.h5')


In [None]:
# 10 indices chosen randomly
indices = random.sample(range(len(x_val)), 10)

# Get the images and true labels
x_sample = x_val[indices]
y_true = y_val[indices]
    
def plot_predictions(model):
    # Predict labels
    y_pred_probs = model.predict(x_sample)
    y_pred = np.argmax(y_pred_probs, axis=1)
    
    # Decode labels using label_incoder
    actual_labels = label_encoder.inverse_transform(y_true)
    predicted_labels = label_encoder.inverse_transform(y_pred)
    
    # Plotting
    plt.figure(figsize=(15, 6))
    for i in range(10):
        plt.subplot(2, 5, i + 1)
        img = cv2.cvtColor((x_sample[i] * 255).astype(np.uint8), cv2.COLOR_BGR2RGB)  # if images are normalized
        plt.imshow(img)
        plt.axis('off')
        plt.title(f"True: {actual_labels[i]}\nPred: {predicted_labels[i]}", fontsize=10, color='green' if actual_labels[i]==predicted_labels[i] else 'red')
    plt.tight_layout()
    plt.show()

In [None]:
# CNN Model Predictions
plot_predictions(resnet_model)