In [None]:
!pip install datasets

In [None]:
# Login huggingface for access of dataset
from huggingface_hub import login

HF_TOKEN = "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"
login(token=HF_TOKEN, add_to_git_credential=True)

In [None]:
import tensorflow as tf
import numpy as np
from datasets import load_dataset
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
# Function for loading huggingface data
def load_hf_data():
    # Load dataset
    dataset = load_dataset("FangDai/Thyroid_Ultrasound_Images", token=HF_TOKEN)
    
    # Split train and validation data
    train_data = dataset["train"].train_test_split(test_size=0.2, seed=42)
    
    # Function for preprocessing images
    def preprocess(batch):
        images = [img.convert("RGB").resize((224, 224)) for img in batch["image"]]
        return {
            "image": [np.array(img, dtype=np.float32)/255.0 for img in images],
            "label": batch["label"]
        }
    
    # Applying preprocessing
    train_dataset = train_data["train"].map(preprocess, batched=True, batch_size=32)
    val_dataset = train_data["test"].map(preprocess, batched=True, batch_size=32)
    
    return train_dataset, val_dataset

train_data, val_data = load_hf_data()

In [None]:
# Function for converting huggingface dataset to tensorflow formatted dataset
def to_tf_dataset(hf_dataset):
    images = np.stack(hf_dataset["image"])
    labels = np.array(hf_dataset["label"])
    return tf.data.Dataset.from_tensor_slices((images, labels))

train_tf = to_tf_dataset(train_data).batch(32).prefetch(tf.data.AUTOTUNE)
val_tf = to_tf_dataset(val_data).batch(32).prefetch(tf.data.AUTOTUNE)

In [None]:
# Define model
model = tf.keras.Sequential([
    # Input layer
    tf.keras.layers.Input(shape=(224, 224, 3)),
    
    # Convolutional layer 1
    tf.keras.layers.Conv2D(32, (3,3), activation="relu"),
    tf.keras.layers.MaxPooling2D(),
    
    # Convolutional layer 2
    tf.keras.layers.Conv2D(64, (3,3), activation="relu"),
    tf.keras.layers.MaxPooling2D(),
    
    # Convolutional layer 3
    tf.keras.layers.Conv2D(128, (3,3), activation="relu"),
    tf.keras.layers.GlobalAveragePooling2D(),
    
    # Output layer
    tf.keras.layers.Dense(3, activation="softmax")
])

# Compile model
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

In [None]:
# Train model
callbacks = [
    tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True, verbose=1),
    tf.keras.callbacks.ModelCheckpoint("thyroid_model.keras", save_best_only=True, verbose=1),
    tf.keras.callbacks.ReduceLROnPlateau(factor=0.3, patience=3, verbose=1)
]

history = model.fit(
    train_tf,
    validation_data=val_tf,
    epochs=125,
    callbacks=callbacks,
    verbose=1
)

In [None]:
# Model evaluation
y_pred = np.argmax(model.predict(val_tf), axis=1)
y_true = np.concatenate([y for x, y in val_tf], axis=0)
loss, acc = model.evaluate(val_tf)

print(f"\nTest Accuracy: {acc:.4f} | Test Loss: {loss:.4f}")

print("\nClassification Report:")
print(classification_report(y_true, y_pred, target_names=["FTC", "MTC", "PTC"]))

print("\nConfusion Matrix:")
print(confusion_matrix(y_true, y_pred))