In [1]:
!pip install tensorflow-addons==0.16.1


Collecting tensorflow-addons==0.16.1
  Downloading tensorflow_addons-0.16.1-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (1.8 kB)
Downloading tensorflow_addons-0.16.1-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m17.7 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: tensorflow-addons
Successfully installed tensorflow-addons-0.16.1


In [2]:
# Import Libraries
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report, confusion_matrix

import tensorflow as tf
from tensorflow.keras.applications import VGG19
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Lambda, RandomFlip, RandomRotation, RandomZoom, RandomContrast
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.regularizers import l2

# Configuration
IMG_SIZE = (180, 180)
BATCH_SIZE = 128  #32
EPOCHS = 100  #100
SEED = 42
CLASS_NAMES = ['Eczema', 'ACNE', 'Psoriasis']
BASE_PATH = "/kaggle/input/acne-psoriasis-eczema-dataset/Acne_Psoriasis_Eczema_Dataset/"

# 1. Data Preparation
def create_dataframe():
    data_dict = {"image_path": [], "target": []}
    
    class_info = [
        ("1. Eczema 1677", 0),
        ("ACNE", 1),
        ("Psoriasis", 2)
    ]
    
    for class_dir, label in class_info:
        full_path = os.path.join(BASE_PATH, class_dir)
        if not os.path.exists(full_path):
            raise FileNotFoundError(f"Directory not found: {full_path}")
            
        for img_file in os.listdir(full_path):
            if img_file.lower().endswith(('.png', '.jpg', '.jpeg')):
                img_path = os.path.join(full_path, img_file)
                data_dict["image_path"].append(img_path)
                data_dict["target"].append(label)
                
    return pd.DataFrame(data_dict)

df = create_dataframe()
print("Class distribution:\n", df['target'].value_counts())

# 2. Handle Class Imbalance
class_weights = compute_class_weight('balanced', classes=np.unique(df['target']), y=df['target'])
class_weights = dict(enumerate(class_weights))
print("\nClass weights:", class_weights)

# 3. Image Loading and Preprocessing
def load_images(df):
    images = []
    labels = []
    
    for idx, row in df.iterrows():
        try:
            img = cv2.imread(row['image_path'])
            if img is None:
                raise ValueError("Could not read image")
                
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = cv2.resize(img, IMG_SIZE)
            images.append(img)
            labels.append(row['target'])
        except Exception as e:
            print(f"Error processing {row['image_path']}: {str(e)}")
    
    return np.array(images, dtype=np.float32), np.array(labels)

images, labels = load_images(df)

# 4. Data Splitting
X_train, X_test, y_train, y_test = train_test_split(
    images, 
    labels,
    test_size=0.2,
    stratify=labels,
    random_state=SEED
)

Class distribution:
 target
0    9327
2    3812
1    3198
Name: count, dtype: int64

Class weights: {0: 0.5838604767520817, 1: 1.7028351052741297, 2: 1.4285589366911509}


In [None]:
# 5. Model Architecture with Separate Augmentation (FIXED)
def build_model():
    # Create base VGG19 model
    base_model = VGG19(
        weights='imagenet',
        include_top=False,
        input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3)
    )
    base_model.trainable = False

    # Custom preprocessing layer for VGG19
    class VGGPreprocess(tf.keras.layers.Layer):
        def call(self, inputs):
            return tf.keras.applications.vgg19.preprocess_input(inputs)

    # Build model
    model = Sequential([
        VGGPreprocess(),
        base_model,
        Flatten(),
        Dense(512, activation='relu', kernel_regularizer=l2(0.01)),
        Dropout(0.5),
        Dense(256, activation='relu', kernel_regularizer=l2(0.01)),
        Dropout(0.3),
        Dense(3, activation='softmax')
    ])

    model.compile(
        optimizer=Adam(learning_rate=1e-4),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

# Create augmentation layer
augmentation = Sequential([
    RandomFlip("horizontal_and_vertical"),
    RandomRotation(0.2),
    RandomZoom(0.15),
    RandomContrast(0.1)
])

# Create optimized data pipeline
def create_dataset(images, labels, training=False):
    dataset = tf.data.Dataset.from_tensor_slices((images, labels))
    
    if training:
        dataset = dataset.map(
            lambda x, y: (augmentation(x, training=True), y),
            num_parallel_calls=tf.data.AUTOTUNE
        )
    
    return dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

# Create datasets with proper device placement
with tf.device('/CPU:0'):  # Force CPU preprocessing
    train_dataset = create_dataset(X_train, y_train, training=True)
    test_dataset = create_dataset(X_test, y_test)

# 6. Define Callbacks First
callbacks = [
    EarlyStopping(
        monitor='val_loss',
        patience=20,
        restore_best_weights=True,
        verbose=1
    ),
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.2,
        patience=20,
        verbose=1
    )
]

# Then proceed with model building and training
model = build_model()
model.summary()

history = model.fit(
    train_dataset,
    validation_data=test_dataset,
    epochs=EPOCHS,
    class_weight=class_weights,
    callbacks=callbacks,  # Now properly defined
    verbose=1
)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg19/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m80134624/80134624[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


Epoch 1/100
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m146s[0m 1s/step - accuracy: 0.4360 - loss: 17.3847 - val_accuracy: 0.6169 - val_loss: 13.6599 - learning_rate: 1.0000e-04
Epoch 2/100
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 922ms/step - accuracy: 0.5132 - loss: 14.0644 - val_accuracy: 0.6105 - val_loss: 12.9531 - learning_rate: 1.0000e-04
Epoch 3/100
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 920ms/step - accuracy: 0.5242 - loss: 13.0508 - val_accuracy: 0.6190 - val_loss: 12.2934 - learning_rate: 1.0000e-04
Epoch 4/100
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 927ms/step - accuracy: 0.5385 - loss: 12.3092 - val_accuracy: 0.6169 - val_loss: 11.6357 - learning_rate: 1.0000e-04
Epoch 5/100
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m96s[0m 928ms/step - accuracy: 0.5397 - loss: 11.6295 - val_accuracy: 0.6239 - val_loss: 10.9847 - learning_rate: 1.0000e-04
Epoch 6/100
[1m103/10

In [None]:
# 7. Evaluation
y_pred = np.argmax(model.predict(X_test), axis=1)

print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=CLASS_NAMES))

plt.figure(figsize=(8,6))
sns.heatmap(confusion_matrix(y_test, y_pred), 
            annot=True, fmt='d',
            xticklabels=CLASS_NAMES,
            yticklabels=CLASS_NAMES)
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()


In [None]:
# Save in H5 format
model.save('skin_disease_classifier_augmented.h5')
print("Model saved successfully in H5 format!")

In [None]:

# 9. Final Evaluation
from tensorflow.keras.models import load_model
import numpy as np
from sklearn.metrics import accuracy_score

# Redefine custom preprocessing layer class (MUST match original definition)
class VGGPreprocess(tf.keras.layers.Layer):
    def call(self, inputs):
        return tf.keras.applications.vgg19.preprocess_input(inputs)

# Load model with custom layer specification
model = load_model(
    'skin_disease_classifier_augmented.h5',
    custom_objects={'VGGPreprocess': VGGPreprocess}
)

# Convert test data to proper format
X_test_prepared = X_test.astype('float32')  # Ensure correct dtype

# Evaluate using batched dataset (recommended for large datasets)
test_loss, test_accuracy = model.evaluate(
    tf.data.Dataset.from_tensor_slices((X_test_prepared, y_test))
                            .batch(BATCH_SIZE),
    verbose=0
)

print(f"\nFinal Test Accuracy: {test_accuracy * 100:.2f}%")
print(f"Final Test Loss: {test_loss:.4f}")

# Predict in batches to avoid memory issues
y_pred_probs = model.predict(X_test_prepared, batch_size=BATCH_SIZE)
y_pred_classes = np.argmax(y_pred_probs, axis=1)

# Verify accuracy
manual_accuracy = accuracy_score(y_test, y_pred_classes)
print(f"\nManual Accuracy Verification: {manual_accuracy * 100:.2f}%")

# Allow small numerical differences due to floating point operations
assert abs(test_accuracy - manual_accuracy) < 1e-4, \
    f"Accuracy mismatch: {test_accuracy} vs {manual_accuracy}"