In [1]:
# Basic Python Libraries
import numpy as np
import pandas as pd

# Data Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Image Processing
import cv2
from PIL import Image

# Machine Learning Framework (choose one)
# TensorFlow
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam

# OR PyTorch
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
import torchvision.models as models

# Data Loading and Preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

# Optimizer (LION for PyTorch)
# Install `lion-pytorch` separately
# from lion_pytorch import Lion  # Uncomment if you use the LION optimizer

# Additional Tools
from tqdm.notebook import tqdm  # For progress bars

# Advanced Data Augmentation (optional)
import albumentations as A
from albumentations.pytorch import ToTensorV2


  from .autonotebook import tqdm as notebook_tqdm
  check_for_updates()


In [2]:
import keras.utils

dataset = keras.utils.image_dataset_from_directory(
    directory=r"D:\Project\Data\raw",  # Modified to use the specified path
    labels="inferred",
    label_mode="int",
    class_names=None,
    color_mode="rgb",
    batch_size=32,
    image_size=(256, 256),
    shuffle=True,
    seed=None,
    validation_split=None,
    subset=None,
    interpolation="bilinear",
    follow_links=False,
    crop_to_aspect_ratio=False,
    pad_to_aspect_ratio=False,
    data_format=None,
    verbose=True,
)


Found 2573 files belonging to 8 classes.


In [3]:
import keras.utils
from tensorflow.data import AUTOTUNE

# Path to your dataset
data_dir = r"D:\Project\Data\raw"

# Parameters
batch_size = 32
img_size = (256, 256)
validation_split = 0.2  # 20% of data used for validation
seed = 123  # For reproducibility

# Training dataset
train_dataset = keras.utils.image_dataset_from_directory(
    directory=data_dir,
    labels="inferred",
    label_mode="int",
    color_mode="rgb",
    batch_size=batch_size,
    image_size=img_size,
    shuffle=True,
    seed=seed,
    validation_split=validation_split,
    subset="training",
)

# Validation dataset
validation_dataset = keras.utils.image_dataset_from_directory(
    directory=data_dir,
    labels="inferred",
    label_mode="int",
    color_mode="rgb",
    batch_size=batch_size,
    image_size=img_size,
    shuffle=True,
    seed=seed,
    validation_split=validation_split,
    subset="validation",
)

# Further split validation set into validation and test sets
test_split = 0.5  # Use 50% of the validation dataset for testing
val_batches = tf.data.experimental.cardinality(validation_dataset)
test_size = int(val_batches.numpy() * test_split)

test_dataset = validation_dataset.take(test_size)
validation_dataset = validation_dataset.skip(test_size)

# Optimize performance using prefetch
train_dataset = train_dataset.prefetch(buffer_size=AUTOTUNE)
validation_dataset = validation_dataset.prefetch(buffer_size=AUTOTUNE)
test_dataset = test_dataset.prefetch(buffer_size=AUTOTUNE)

# Output dataset details
print(f"Training dataset: {train_dataset}")
print(f"Validation dataset: {validation_dataset}")
print(f"Test dataset: {test_dataset}")


Found 2573 files belonging to 8 classes.
Using 2059 files for training.
Found 2573 files belonging to 8 classes.
Using 514 files for validation.
Training dataset: <_PrefetchDataset element_spec=(TensorSpec(shape=(None, 256, 256, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int32, name=None))>
Validation dataset: <_PrefetchDataset element_spec=(TensorSpec(shape=(None, 256, 256, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int32, name=None))>
Test dataset: <_PrefetchDataset element_spec=(TensorSpec(shape=(None, 256, 256, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int32, name=None))>


In [4]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers  # Import the layers module
import tensorflow as tf

# Add data augmentation to the training dataset
data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal_and_vertical"),  # Randomly flip images
    layers.RandomRotation(0.2),                   # Randomly rotate images
    layers.RandomZoom(0.2),                       # Randomly zoom into images
])

# Apply augmentation to training data
train_dataset = train_dataset.map(
    lambda x, y: (data_augmentation(x, training=True), y)
)


In [5]:
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt

# Number of classes in your dataset
num_classes = 8

# Function to plot training and validation graphs
def plot_graphs(history, model_name):
    # Accuracy
    plt.figure(figsize=(12, 6))
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title(f'{model_name} Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    # Loss
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title(f'{model_name} Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    plt.show()

# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# =====================
# 1. MobileNetV2 Model
# =====================
print("\nTraining MobileNetV2 Model...")

mobilenet_model = models.Sequential([
    MobileNetV2(input_shape=(256, 256, 3), include_top=False, weights='imagenet', pooling='avg'),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(num_classes, activation='softmax')
])

mobilenet_model.compile(
    optimizer=Adam(learning_rate=0.0001),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# Train MobileNetV2
mobilenet_history = mobilenet_model.fit(
    train_dataset,
    validation_data=validation_dataset,
    epochs=20,
    callbacks=[early_stopping]
)

# Evaluate MobileNetV2
mobilenet_loss, mobilenet_accuracy = mobilenet_model.evaluate(test_dataset)
print(f"MobileNetV2 - Test Loss: {mobilenet_loss}, Test Accuracy: {mobilenet_accuracy}")

# Plot graphs for MobileNetV2
plot_graphs(mobilenet_history, "MobileNetV2")



Training MobileNetV2 Model...


  MobileNetV2(input_shape=(256, 256, 3), include_top=False, weights='imagenet', pooling='avg'),


Epoch 1/20
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m306s[0m 4s/step - accuracy: 0.3168 - loss: 1.8787 - val_accuracy: 0.2558 - val_loss: 2.1750
Epoch 2/20
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m252s[0m 4s/step - accuracy: 0.7243 - loss: 0.8027 - val_accuracy: 0.3372 - val_loss: 2.1343
Epoch 3/20
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m248s[0m 4s/step - accuracy: 0.8250 - loss: 0.5435 - val_accuracy: 0.4496 - val_loss: 1.6065
Epoch 4/20
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m225s[0m 3s/step - accuracy: 0.8474 - loss: 0.4376 - val_accuracy: 0.4729 - val_loss: 1.7042
Epoch 5/20
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m224s[0m 3s/step - accuracy: 0.8655 - loss: 0.3984 - val_accuracy: 0.5659 - val_loss: 1.2242
Epoch 6/20
[1m29/65[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m1:06:04[0m 110s/step - accuracy: 0.8957 - loss: 0.3080

KeyboardInterrupt: 

In [23]:
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Paths to dataset
data_dir = "D:/Project/Data/raw"  # Update with your dataset path

# Dataset parameters
img_size = (256, 256)
batch_size = 32

# Load dataset
train_dataset = tf.keras.utils.image_dataset_from_directory(
    data_dir,
    validation_split=0.2,
    subset="training",
    seed=123,
    image_size=img_size,
    batch_size=batch_size
)

validation_dataset = tf.keras.utils.image_dataset_from_directory(
    data_dir,
    validation_split=0.2,
    subset="validation",
    seed=123,
    image_size=img_size,
    batch_size=batch_size
)

test_dataset = tf.keras.utils.image_dataset_from_directory(
    data_dir,
    validation_split=None,
    seed=123,
    image_size=img_size,
    batch_size=batch_size
)

# Optimize dataset loading
AUTOTUNE = tf.data.AUTOTUNE
train_dataset = train_dataset.prefetch(buffer_size=AUTOTUNE)
validation_dataset = validation_dataset.prefetch(buffer_size=AUTOTUNE)
test_dataset = test_dataset.prefetch(buffer_size=AUTOTUNE)

# Model definition
base_model = EfficientNetB0(input_shape=(256, 256, 3), include_top=False, weights="imagenet")
base_model.trainable = False  # Freeze the base model

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)  # Dropout for regularization
output = Dense(len(train_dataset.class_names), activation="softmax")(x)

model = Model(inputs=base_model.input, outputs=output)

# Compile the model
model.compile(
    optimizer=Adam(learning_rate=0.0001),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

# Callbacks for training
early_stopping = EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor="val_loss", factor=0.2, patience=3, min_lr=1e-6)

# Train the model
history = model.fit(
    train_dataset,
    validation_data=validation_dataset,
    epochs=30,
    callbacks=[early_stopping, reduce_lr]
)

# Evaluate the model on the test dataset
test_loss, test_accuracy = model.evaluate(test_dataset)
print(f"EfficientNetB0 - Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

# Fine-tune the model
base_model.trainable = True  # Unfreeze the base model

# Compile again with a lower learning rate
model.compile(
    optimizer=Adam(learning_rate=1e-5),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

# Fine-tune the model
fine_tune_history = model.fit(
    train_dataset,
    validation_data=validation_dataset,
    epochs=10,
    callbacks=[early_stopping, reduce_lr]
)

# Final evaluation
final_loss, final_accuracy = model.evaluate(test_dataset)
print(f"EfficientNetB0 (Fine-tuned) - Final Test Loss: {final_loss:.4f}, Final Test Accuracy: {final_accuracy:.4f}")


Found 2573 files belonging to 8 classes.
Using 2059 files for training.
Found 2573 files belonging to 8 classes.
Using 514 files for validation.
Found 2573 files belonging to 8 classes.


AttributeError: '_PrefetchDataset' object has no attribute 'class_names'