# Dogs Skin Disease Prediction Model

In [35]:
import os
from pathlib import Path
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img

base_dir = Path("Dataset")
IMG_HEIGHT = 244
IMG_WIDTH = 244
BATCH_SIZE = 8

## Preprocessing 

### Data Augmentation

In [40]:
IMG_HEIGHT=244
IMG_WIDTH=244
BATCH_SIZE=8
train_datagen = ImageDataGenerator(
   rescale=1.0/255,
   rotation_range=40,
   width_shift_range=0.3,
   height_shift_range=0.3,
   shear_range=0.3,
   zoom_range=0.3,
   horizontal_flip=True,
   vertical_flip=True,
   brightness_range=[0.7,1.3],
   channel_shift_range=50,
   fill_mode='nearest',
   validation_split=0.2
)

### Multiple Augmented Versions

In [45]:
def augment_directory(source_dir, num_augmented=5):
    for class_dir in source_dir.iterdir():
        if class_dir.is_dir():
            aug_dir = class_dir / 'augmented'
            if aug_dir.exists():
                continue  
                
            aug_dir.mkdir(exist_ok=True)
            
            for img_path in class_dir.glob('*.[jp][pn][g]'):
                img = load_img(img_path, target_size=(IMG_HEIGHT, IMG_WIDTH))
                x = img_to_array(img)
                x = x.reshape((1,) + x.shape)
                
                i = 0
                for batch in train_datagen.flow(x, batch_size=1,
                                             save_to_dir=aug_dir,
                                             save_prefix=f'aug_{img_path.stem}',
                                             save_format='jpg'):
                    i += 1
                    if i >= num_augmented:
                        break
                        
base_dir = Path("Dataset")  # or whatever your folder name is
augment_directory(base_dir, num_augmented=10)

### Creating generators

In [51]:
train_datagen = ImageDataGenerator(
    rescale=1.0/255  # Only rescaling since we're using pre-augmented images
)

validation_datagen = ImageDataGenerator(
    rescale=1.0/255
)

In [59]:
import shutil
# Create a separate directory for training and validation
train_dir = base_dir / 'training_data'
val_dir = base_dir / 'validation_data'

if not train_dir.exists() or not val_dir.exists():
    train_dir.mkdir(exist_ok=True)
    val_dir.mkdir(exist_ok=True)
    
    class_names = ['Allergies', 'Autoimmune', 'Healthy', 'Infections', 'Parasites']
    for class_name in class_names:
        # Create directories
        train_class_dir = train_dir / class_name
        val_class_dir = val_dir / class_name
        train_class_dir.mkdir(exist_ok=True)
        val_class_dir.mkdir(exist_ok=True)
        
        # Copy augmented images to training directory
        aug_dir = base_dir / class_name / 'augmented'
        if aug_dir.exists():
            for img_file in aug_dir.glob('*.[jp][pn][g]'):
                shutil.copy2(img_file, train_class_dir)
        
        # Copy original images (excluding augmented folder) to validation directory
        orig_dir = base_dir / class_name
        for img_file in orig_dir.glob('*.[jp][pn][g]'):
            if 'augmented' not in str(img_file):
                shutil.copy2(img_file, val_class_dir)

# Create generators
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

validation_generator = validation_datagen.flow_from_directory(
    val_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

print("Training samples (augmented):", train_generator.samples)
print("Validation samples (original):", validation_generator.samples)

Found 900 images belonging to 5 classes.
Found 90 images belonging to 5 classes.
Training samples (augmented): 900
Validation samples (original): 90


In [61]:
print("Classes:", train_generator.class_indices)
print("Training samples:", train_generator.samples)
print("Validation samples:", validation_generator.samples)

Classes: {'Allergies': 0, 'Autoimmune': 1, 'Healthy': 2, 'Infections': 3, 'Parasites': 4}
Training samples: 900
Validation samples: 90


## Model Training and Evaluation

In [64]:
from tensorflow.keras.applications import MobileNetV2, VGG16, EfficientNetB0
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
import numpy as np
import json

### Getting all images and labels 

As we have augmented images we need to collect the data from both 

In [74]:
# Prepare separate lists for training (augmented) and validation (original) data
train_data = []
train_labels = []
val_data = []
val_labels = []

# Define class names explicitly
class_names = ['Allergies', 'Autoimmune', 'Healthy', 'Infections', 'Parasites']
class_indices = {name: idx for idx, name in enumerate(class_names)}

# Collect data separately for training and validation
for class_name in class_names:
    class_dir = base_dir / class_name
    if class_dir.is_dir():
        class_index = class_indices[class_name]
        
        # Process original images for validation
        for img_path in class_dir.glob('*.[jp][pn][g]'):
            if 'augmented' not in str(img_path):  # Skip augmented directory
                img = load_img(img_path, target_size=(IMG_HEIGHT, IMG_WIDTH))
                x = img_to_array(img)
                x = x / 255.0  # Normalize
                val_data.append(x)
                label = np.zeros(len(class_indices))
                label[class_index] = 1
                val_labels.append(label)
        
        # Process augmented images for training
        aug_dir = class_dir / 'augmented'
        if aug_dir.exists():
            for img_path in aug_dir.glob('*.[jp][pn][g]'):
                img = load_img(img_path, target_size=(IMG_HEIGHT, IMG_WIDTH))
                x = img_to_array(img)
                x = x / 255.0  # Normalize
                train_data.append(x)
                label = np.zeros(len(class_indices))
                label[class_index] = 1
                train_labels.append(label)

# Convert to numpy arrays
train_data = np.array(train_data)
train_labels = np.array(train_labels)
val_data = np.array(val_data)
val_labels = np.array(val_labels)

print("Training samples (augmented):", len(train_data))
print("Validation samples (original):", len(val_data))

Training samples (augmented): 900
Validation samples (original): 90


### Creating the model

In [77]:
#Initializing model and training parameters
kf = KFold(n_splits=5, shuffle=True, random_state=42)
best_model = None
best_accuracy = 0
best_fold = None
accuracies = []
fold = 1

In [79]:
#Creating model
base_model = MobileNetV2(input_shape=(IMG_HEIGHT, IMG_WIDTH, 3),
                        include_top=False,
                        weights='imagenet')
base_model.trainable = False

model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(len(train_generator.class_indices), activation='softmax')
])

model.compile(optimizer=Adam(learning_rate=0.001),
             loss='categorical_crossentropy',
             metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
callbacks = [early_stopping]

  base_model = MobileNetV2(input_shape=(IMG_HEIGHT, IMG_WIDTH, 3),


In [21]:
# Create base model
base_model = EfficientNetB0(input_shape=(IMG_HEIGHT, IMG_WIDTH, 3),
                           include_top=False,
                           weights='imagenet')

# Fine-tune the last few layers
base_model.trainable = True
for layer in base_model.layers[:-20]:  # EfficientNet can benefit from fine-tuning more layers
    layer.trainable = False

model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(128, activation='relu',
                kernel_regularizer=tf.keras.regularizers.l2(0.01)),
    layers.BatchNormalization(),
    layers.Dropout(0.5),
    layers.Dense(64, activation='relu',
                kernel_regularizer=tf.keras.regularizers.l2(0.01)),
    layers.BatchNormalization(),
    layers.Dropout(0.4),
    layers.Dense(len(train_generator.class_indices), activation='softmax')
])

# Learning rate scheduler
lr_scheduler = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.2,  # Smaller reduction factor
    patience=4,
    min_lr=1e-7
)

# Compile model
model.compile(optimizer=Adam(learning_rate=0.0003),  # Adjusted for EfficientNet
             loss='categorical_crossentropy',
             metrics=['accuracy'])

# Early stopping
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=10,  # Increased patience
    restore_best_weights=True,
    min_delta=0.001
)

# Add this when training
callbacks = [early_stopping, lr_scheduler]

### Training the model with k-fold Cross-Validation

In [None]:
# Initialize KFold
kf = KFold(n_splits=5, shuffle=True, random_state=42)
best_model = None
best_accuracy = 0
best_fold = None
accuracies = []
fold = 1

# Training with cross-validation
for train_index, val_index in kf.split(train_data):
    print(f"Training Fold {fold}...")
    x_train, x_val = train_data[train_index], train_data[val_index]
    y_train, y_val = train_labels[train_index], train_labels[val_index]
    
    history = model.fit(
        x_train, y_train,
        validation_data=(x_val, y_val),
        epochs=20,
        batch_size=BATCH_SIZE,
        callbacks=callbacks,
        verbose=1
    )
    
    eval_result = model.evaluate(x_val, y_val, verbose=0)
    accuracies.append(eval_result[1])
    print(f"Fold {fold} Accuracy: {eval_result[1]}")
    
    if eval_result[1] > best_accuracy:
        best_accuracy = eval_result[1]
        best_fold = fold
        best_model = tf.keras.models.clone_model(model)
        best_model.set_weights(model.get_weights())
    
    fold += 1

print(f"Cross-Validation Accuracy: {np.mean(accuracies)} ± {np.std(accuracies)}")
print(f"Best Fold: {best_fold} with Accuracy: {best_accuracy}")

# Final evaluation on validation data
final_eval = best_model.evaluate(val_data, val_labels, verbose=0)
print(f"Final Validation Accuracy: {final_eval[1]}")

Training Fold 1...
Epoch 1/20
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 94ms/step - accuracy: 0.3378 - loss: 1.7728 - val_accuracy: 0.6833 - val_loss: 0.8661
Epoch 2/20
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 93ms/step - accuracy: 0.6919 - loss: 0.8308 - val_accuracy: 0.7667 - val_loss: 0.6339
Epoch 3/20
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 85ms/step - accuracy: 0.8060 - loss: 0.5807 - val_accuracy: 0.8278 - val_loss: 0.4773
Epoch 4/20
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 84ms/step - accuracy: 0.8457 - loss: 0.4256 - val_accuracy: 0.8500 - val_loss: 0.4144
Epoch 5/20
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 85ms/step - accuracy: 0.9108 - loss: 0.2865 - val_accuracy: 0.8389 - val_loss: 0.3902
Epoch 6/20
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 87ms/step - accuracy: 0.8901 - loss: 0.2658 - val_accuracy: 0.8722 - val_loss: 0.3237
Epoch 7/20
[1

In [None]:
print(f"Cross-Validation Accuracy: {np.mean(accuracies)} ± {np.std(accuracies)}")
print(f"Best Fold: {best_fold} with Accuracy: {best_accuracy}")

### Saving the best model in a keras file

In [None]:
if best_model is not None:
    best_model.save('best_model.keras')
    print("Best model saved as 'best_model.keras'")

with open('class_indices.json', 'w') as f:
    json.dump(train_generator.class_indices, f)
    print("Class indices saved as 'class_indices.json'")

### Evaluating the model

In [None]:
#Confusion Matrix
y_pred = best_model.predict(x_val)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_val, axis=1)

conf_matrix = confusion_matrix(y_true, y_pred_classes)
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", 
           xticklabels=train_generator.class_indices.keys(),
           yticklabels=train_generator.class_indices.keys())
plt.title("Confusion Matrix")
plt.ylabel("True Labels")
plt.xlabel("Predicted Labels")
plt.show()

In [None]:
print("\nClassification Report:")
print(classification_report(y_true, y_pred_classes, 
     target_names=train_generator.class_indices.keys()))