In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Rescaling, RandomFlip, RandomRotation, RandomZoom, RandomContrast
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.applications import VGG16
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from collections import Counter
import kerastuner as kt

# Business Context:
# This project aims to classify products as either "Defective" or "Good."
# Goal: Improve production quality by accurately detecting defective products,
# which helps reduce waste, improve customer satisfaction, and lower operational costs.

# Constants
BATCH_SIZE = 16
IMG_HEIGHT = 224
IMG_WIDTH = 224
K_FOLDS = 5

# Load datasets
dataset = image_dataset_from_directory(
    'data',
    labels='inferred',
    label_mode='int',
    image_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE
)

# EDA: Explore the dataset
def explore_dataset(dataset, dataset_name):
    print(f"\n--- Exploring {dataset_name} Dataset ---")
    class_counts = Counter([label.numpy() for _, label in dataset.unbatch()])
    print(f"Class Distribution in {dataset_name} Dataset:")
    for cls, count in class_counts.items():
        print(f"Class {cls} ({'Defective' if cls == 0 else 'Good'}): {count} samples")

    plt.figure(figsize=(6, 4))
    sns.barplot(x=list(class_counts.keys()), y=list(class_counts.values()), palette="viridis")
    plt.title(f"{dataset_name} Dataset Class Distribution")
    plt.xlabel("Class")
    plt.ylabel("Count")
    plt.xticks([0, 1], ['Defective', 'Good'])
    plt.show()

    plt.figure(figsize=(10, 10))
    for images, labels in dataset.take(1):
        for i in range(9):
            ax = plt.subplot(3, 3, i + 1)
            plt.imshow(images[i].numpy().astype("uint8"))
            plt.title(f"Class: {'Defective' if labels[i] == 0 else 'Good'}")
            plt.axis("off")
    plt.suptitle(f"Sample Images from {dataset_name} Dataset", fontsize=16)
    plt.show()

# Perform EDA on the dataset
explore_dataset(dataset, "Complete")

# Data augmentation and normalization
data_augmentation = tf.keras.Sequential([
    RandomFlip("horizontal"),
    RandomRotation(0.1),
    RandomZoom(0.1),
    RandomContrast(0.1)
])

def preprocess_image(image, label):
    image = data_augmentation(image)
    image = Rescaling(1./255)(image)
    return image, label

dataset = dataset.map(preprocess_image)

# Prefetch for performance improvement
AUTOTUNE = tf.data.AUTOTUNE
dataset = dataset.cache().prefetch(buffer_size=AUTOTUNE)

# Hyperparameter tuning
def model_builder(hp):
    base_model = VGG16(input_shape=(IMG_HEIGHT, IMG_WIDTH, 3), include_top=False, weights='imagenet')
    base_model.trainable = False

    model = Sequential([
        base_model,
        Flatten(),
        Dense(512, activation='relu'),
        Dropout(0.5),
        Dense(2, activation='softmax')
    ])
    
    # Hyperparameter tuning for learning rate and optimizer
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-4, 1e-3, 1e-2])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=hp_learning_rate),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model

tuner = kt.Hyperband(model_builder,
                     objective='val_accuracy',
                     max_epochs=10,
                     factor=3,
                     directory='tuner_dir',
                     project_name='product_classifier_tuning')

# Extract images and labels from the dataset
def get_images_and_labels(dataset):
    images = []
    labels = []
    for image, label in dataset.unbatch():
        images.append(image.numpy())
        labels.append(label.numpy())
    return np.array(images), np.array(labels)

images, labels = get_images_and_labels(dataset)

# Cross-validation with hyperparameter tuning
kf = StratifiedKFold(n_splits=K_FOLDS, shuffle=True)
fold_no = 1
accuracies, precisions, recalls, f1s = [], [], [], []

for train_index, val_index in kf.split(images, labels):
    print(f"--- Fold {fold_no} ---")
    train_images, val_images = images[train_index], images[val_index]
    train_labels, val_labels = labels[train_index], labels[val_index]
    
    train_data = tf.data.Dataset.from_tensor_slices((train_images, train_labels)).batch(BATCH_SIZE)
    val_data = tf.data.Dataset.from_tensor_slices((val_images, val_labels)).batch(BATCH_SIZE)

    train_data = train_data.map(preprocess_image).cache().prefetch(buffer_size=AUTOTUNE)
    val_data = val_data.map(preprocess_image).cache().prefetch(buffer_size=AUTOTUNE)

    tuner.search(train_data, validation_data=val_data, epochs=10)
    best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
    print(f"Best hyperparameters for fold {fold_no}: Learning rate: {best_hps.get('learning_rate')}")
    
    model = tuner.hypermodel.build(best_hps)
    history = model.fit(train_data, validation_data=val_data, epochs=10)
    model.save_weights(f'defectandgood_classifier_fold_{fold_no}.weights.h5')

    # Evaluate the model
    val_predictions = np.argmax(model.predict(val_data), axis=-1)
    val_true = np.concatenate([y for x, y in val_data], axis=0)

    accuracies.append(accuracy_score(val_true, val_predictions))
    precisions.append(precision_score(val_true, val_predictions))
    recalls.append(recall_score(val_true, val_predictions))
    f1s.append(f1_score(val_true, val_predictions))

    print(f"Fold {fold_no} Accuracy: {accuracies[-1]}")
    print(f"Fold {fold_no} Precision: {precisions[-1]}")
    print(f"Fold {fold_no} Recall: {recalls[-1]}")
    print(f"Fold {fold_no} F1 Score: {f1s[-1]}")

    fold_no += 1
    
    # Clear Keras session
    tf.keras.backend.clear_session()

# Calculate average metrics
average_accuracy = np.mean(accuracies)
average_precision = np.mean(precisions)
average_recall = np.mean(recalls)
average_f1 = np.mean(f1s)
print(f"\nAverage Accuracy: {average_accuracy}")
print(f"Average Precision: {average_precision}")
print(f"Average Recall: {average_recall}")
print(f"Average F1 Score: {average_f1}")

# Business Impact Analysis
def business_impact_report():
    print("\n--- Business Impact Report ---")
    print(f"Final Average Test Accuracy: {average_accuracy:.2f}")
    print(f"Final Average Precision: {average_precision:.2f}")
    print(f"Final Average Recall: {average_recall:.2f}")
    print(f"Final Average F1 Score: {average_f1:.2f}")
    print("Projected Business Benefits:")
    print(f" - Reduction in defective products by {average_accuracy * 100:.2f}%.")
    print(" - Improved quality control.")
    print(" - Enhanced customer satisfaction.")
    print(" - Lower production costs due to reduced waste.")

# Generate the report
business_impact_report()

# Optional: Visualize training and validation performance for the last fold
def plot_training_history(history):
    plt.figure(figsize=(12, 6))
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Model Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend(loc='lower right')
    plt.show()
plot_training_history(history)