In [1]:
# VGG16 

In [10]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.utils import shuffle

In [11]:
# Function to load and preprocess images
def load_images(folder_path):
    images = []
    labels = []
    class_names = [name for name in os.listdir(folder_path) if not name.startswith('.')]
    class_dict = {class_name: i for i, class_name in enumerate(class_names)}

    for class_name in class_names:
        class_path = os.path.join(folder_path, class_name)
        for filename in os.listdir(class_path):
            img_path = os.path.join(class_path, filename)
            
            # Add a check to ensure the image is not empty
            img = cv2.imread(img_path)
            if img is not None:
                img = cv2.resize(img, (224, 224))  # VGG16 input size
                img = img / 255.0  # Normalize pixel values to [0, 1]
                images.append(img)
                labels.append(class_dict[class_name])

    return np.array(images), np.array(labels)


In [12]:
# Remove duplicate images based on content
def remove_duplicates(images, labels):
    hash_set = set()
    unique_images = []
    unique_labels = []

    for i in range(len(images)):
        img_hash = hash(images[i].tobytes())
        if img_hash not in hash_set:
            hash_set.add(img_hash)
            unique_images.append(images[i])
            unique_labels.append(labels[i])

    return np.array(unique_images), np.array(unique_labels)


In [13]:
# Load and preprocess data
data_folder = 'train'
images, labels = load_images(data_folder)

In [14]:
# Remove duplicates
images, labels = remove_duplicates(images, labels)


In [15]:
# Shuffle the data
images, labels = shuffle(images, labels, random_state=42)


In [16]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

# Create data generators for data augmentation
train_datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)


In [17]:
# Create the VGG16 model
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

model = Sequential()
model.add(base_model)
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dense(len(np.unique(labels)), activation='softmax'))


In [18]:
# Use the legacy optimizer and specify learning_rate
optimizer_legacy = tf.keras.optimizers.legacy.Adam(learning_rate=0.0001)


In [19]:
# Compile the model
model.compile(optimizer=optimizer_legacy, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# Set up callbacks
checkpoint = ModelCheckpoint("best_model.h5", save_best_only=True)
early_stopping = EarlyStopping(patience=10, restore_best_weights=True)


In [None]:
# Train the model
history = model.fit(
    train_datagen.flow(X_train, y_train, batch_size=32),
    validation_data=(X_test, y_test),
    epochs=50,
    callbacks=[checkpoint, early_stopping]
)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50

In [None]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.utils import shuffle

# Function to load and preprocess images
def load_images(folder_path):
    images = []
    labels = []
    class_names = [name for name in os.listdir(folder_path) if not name.startswith('.')]
    class_dict = {class_name: i for i, class_name in enumerate(class_names)}

    for class_name in class_names:
        class_path = os.path.join(folder_path, class_name)
        for filename in os.listdir(class_path):
            img_path = os.path.join(class_path, filename)
            
            # Add a check to ensure the image is not empty
            img = cv2.imread(img_path)
            if img is not None:
                img = cv2.resize(img, (224, 224))  # VGG16 input size
                img = img / 255.0  # Normalize pixel values to [0, 1]
                images.append(img)
                labels.append(class_dict[class_name])

    return np.array(images), np.array(labels)

# Remove duplicate images based on content
def remove_duplicates(images, labels):
    hash_set = set()
    unique_images = []
    unique_labels = []

    for i in range(len(images)):
        img_hash = hash(images[i].tobytes())
        if img_hash not in hash_set:
            hash_set.add(img_hash)
            unique_images.append(images[i])
            unique_labels.append(labels[i])

    return np.array(unique_images), np.array(unique_labels)

# Load and preprocess data
data_folder = 'train'
images, labels = load_images(data_folder)

# Remove duplicates
images, labels = remove_duplicates(images, labels)

# Shuffle the data
images, labels = shuffle(images, labels, random_state=42)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

# Create data generators for data augmentation
train_datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Create the VGG16 model
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

model = Sequential()
model.add(base_model)
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dense(len(np.unique(labels)), activation='softmax'))

# Use the legacy optimizer and specify learning_rate
optimizer_legacy = tf.keras.optimizers.legacy.Adam(learning_rate=0.0001)

# Compile the model
model.compile(optimizer=optimizer_legacy, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# Set up callbacks
checkpoint = ModelCheckpoint("best_model.h5", save_best_only=True)
early_stopping = EarlyStopping(patience=10, restore_best_weights=True)

# Train the model
history = model.fit(
    train_datagen.flow(X_train, y_train, batch_size=32),
    validation_data=(X_test, y_test),
    epochs=50,
    callbacks=[checkpoint, early_stopping]
)
