<h1 style="color:#e8710a">Importing Libraries</h1>

In [None]:
import pandas as pd
from PIL import Image
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense, BatchNormalization, Activation, Dropout, MaxPooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2
import matplotlib.pyplot as plt
import random

<h1 style="color:#e8710a">Reading The Images Paths and label</h1>

In [None]:
# Read the CSV file and split lines based on whitespace
csv_path = "/kaggle/input/defective-solar-cells/elpv-dataset-master/labels.csv"
images_folder = "/kaggle/input/defective-solar-cells/elpv-dataset-master"

with open(csv_path, "r") as file:
    lines = file.read().splitlines()

# Split each line into filename, label, and type
data = []
for line in lines:
    parts = line.split()
    if len(parts) == 3:
        filename, label, _ = parts
        data.append(
            (os.path.join(images_folder, filename),
            0 if float(label) <= 0.3333333333333333 else (1 if float(label) >= 0.6666666666666666 else int(label)))
        )

<h1 style="color:#e8710a">Loading Images in Arrays</h1>

In [None]:
IMG_SIZE = 224

# Load images and labels 
images = []
labels = []

for image_path, label in data:
    image = Image.open(image_path)
#     image = image.convert("RGB")
    image = image.resize((IMG_SIZE, IMG_SIZE))
    image = np.array(image)
    images.append(image)
    labels.append(label)
    
# Convert the lists dataframe
# dataset_df = pd.DataFrame({'Image': images, 'Label': labels})

# Convert the lists to numpy arrays
images = np.array(images).reshape(-1, 224, 224, 1)
labels = np.array(labels)

<h1 style="color:#e8710a">Some insights about the dataset</h1>

In [None]:
plt.figure(figsize=(12, 6))
pd.DataFrame(labels).value_counts().plot(kind='bar')
plt.title('Data Class Distribution')
plt.xlabel('Class')
plt.ylabel('Count')
plt.show()

<h1 style="color:#e8710a">Balance Data</h1>

In [None]:
from imblearn.over_sampling import SMOTE

# Reshape the 4D image data into a 2D format
num_samples, img_height, img_width, num_channels = images.shape
images_2d = images.reshape((num_samples, img_height * img_width * num_channels))

# Apply SMOTE to the 2D data
smote = SMOTE(sampling_strategy='auto', random_state=42)
images_resampled_2d, labels_resampled = smote.fit_resample(images_2d, labels)

# Reshape the resampled 2D data back to 4D format
images_resampled = images_resampled_2d.reshape((-1, img_height, img_width, num_channels))

images = images_resampled
labels = labels_resampled

In [None]:
plt.figure(figsize=(12, 6))
pd.DataFrame(labels).value_counts().plot(kind='bar')
plt.title('Data Class Distribution')
plt.xlabel('Class')
plt.ylabel('Count')
plt.show()

In [None]:
# Display some sample images
# Generate 9 random indices
sample_indices = random.sample(range(len(images)), 9)

# Create a 3x3 grid of subplots for plotting the images
plt.figure(figsize=(10, 10))
plt.suptitle("Some Sample Images", fontsize=16)

for i, idx in enumerate(sample_indices):
    plt.subplot(3, 3, i + 1)
    plt.imshow(images[idx], cmap='gray')  # Assuming images are grayscale; use 'cmap' based on your data
    plt.title(f"Label: {labels[idx]}")
    plt.axis('off')

plt.show()

<h1 style="color:#e8710a">Enhancing Images</h1>

In [None]:
# Define the image preprocessing function
def image_preprocessing(img):
    # equalize
    img = img.astype('uint8')
    clahe = cv2.createCLAHE(tileGridSize=(8, 8))
    img = clahe.apply(img)
    img = np.expand_dims(img, 2)
    
#     # Apply Sobel filters to detect vertical and diagonal edges
#     vertical_edge = cv2.Sobel(img, cv2.CV_64F, 1, 0, ksize=3)
#     diagonal_edge = cv2.Sobel(img, cv2.CV_64F, 1, 1, ksize=3)

#     # Combine the edges into one image
#     combined_edges = cv2.addWeighted(vertical_edge, 0.5, diagonal_edge, 0.5, 0)

#     # Normalize the values to the range [0, 255]
#     combined_edges = cv2.normalize(combined_edges, None, 0, 255, cv2.NORM_MINMAX)

#     # Convert to 8-bit unsigned integer format
#     combined_edges = np.uint8(combined_edges)
#     return combined_edges
    return img

In [None]:
# Apply the equalize function to each image in 'images'
enhanced_images = np.array([image_preprocessing(img) for img in images])

<h1 style="color:#e8710a">Show Enhanced Images</h1>

In [None]:
# Display some sample images

# Create a 3x3 grid of subplots for plotting the images and their enhanced versions
plt.figure(figsize=(15, 15))
plt.suptitle("Some Sample Images and Enhanced Versions", fontsize=16)

for i, idx in enumerate(sample_indices):
    # Plot the original image
    plt.subplot(3, 6, 2 * i + 1)
    plt.imshow(images[idx], cmap='gray')  # Assuming images are grayscale; use 'cmap' based on your data
    plt.title(f"Label: {labels[idx]}")
    plt.axis('off')

    # Plot the enhanced version beside the original image
    plt.subplot(3, 6, 2 * i + 2)
    plt.imshow(enhanced_images[idx], cmap='gray')  # Assuming enhanced_images are grayscale; use 'cmap' based on your data
    plt.title("Enhanced")
    plt.axis('off')

plt.show()

<h1 style="color:#e8710a">Divide Dataset to Train and Test Data</h1>

In [None]:
# Split the data generator into training and validation generators
train_images, test_images, train_labels, test_labels = train_test_split(enhanced_images, labels, test_size=0.2, random_state=42)

<h1 style="color:#e8710a">Data Augmentation</h1>

In [None]:
# batch_size = 32

# datagen = ImageDataGenerator(
#     rotation_range=40,
#     width_shift_range=0.2,
#     height_shift_range=0.2,
#     shear_range=0.2,
#     zoom_range=0.2,
#     brightness_range=[0.5, 1.5],
#     horizontal_flip=True,
#     vertical_flip=True,
#     fill_mode='nearest',
# )

# # Apply data augmentation to train data
# train_datagen = datagen.flow(train_images, train_labels, batch_size=batch_size)

<h1 style="color:#e8710a">Building The model</h1>

In [None]:
model = Sequential()

# model.add(Conv2D(8, kernel_size=5, padding='same',  input_shape=(IMG_SIZE, IMG_SIZE, 1), kernel_regularizer=l2(0.001)))
# model.add(BatchNormalization())
# model.add(Activation('relu'))
# model.add(MaxPool2D(2, padding='same'))
# model.add(Dropout(0.3))

# model.add(Conv2D(16, kernel_size=3, padding='same', kernel_regularizer=l2(0.001)))
# model.add(BatchNormalization())
# model.add(Activation('relu'))
# model.add(MaxPool2D(2, padding='same'))
# model.add(Dropout(0.3))

# model.add(Conv2D(32, kernel_size=3, padding='same', kernel_regularizer=l2(0.02), input_shape=(IMG_SIZE, IMG_SIZE, 1)))
# model.add(BatchNormalization())
# model.add(Activation('relu'))
# model.add(MaxPool2D(2, padding='same'))
# model.add(Dropout(0.3))

model.add(Conv2D(64, kernel_size=3, padding='same', kernel_regularizer=l2(0.02), input_shape=(IMG_SIZE, IMG_SIZE, 1)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPool2D(2, padding='same'))
model.add(Dropout(0.3))

model.add(Conv2D(128, kernel_size=3, padding='same', kernel_regularizer=l2(0.02)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPool2D(2, padding='same'))
model.add(Dropout(0.3))

# Output Layer
model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(512))

model.add(Dense(1, activation='sigmoid'))


# model.summary()
from keras.utils import plot_model
plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

In [None]:
# Early stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_accuracy', patience=20, verbose=2, restore_best_weights=True)
# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# Define the number of epochs
num_epochs = 200
batch_size = 16

# Start training the model
history = model.fit(
    train_images,
    train_labels,
    epochs=num_epochs,
    batch_size=batch_size,
    validation_data=(test_images, test_labels),
    callbacks=[early_stopping]
)

<h1 style="color:#e8710a">Training History</h1>

In [None]:
# Access the training history
train_loss = history.history['loss']
val_loss = history.history['val_loss']
train_acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

# Create subplots for loss and accuracy
plt.figure(figsize=(12, 4))
# Plot training and validation loss
plt.subplot(1, 2, 1)
plt.plot(train_loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.title('Training and Validation Loss')
# Plot training and validation accuracy
plt.subplot(1, 2, 2)
plt.plot(train_acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.title('Training and Validation Accuracy')
plt.tight_layout()
plt.show()

<h1 style="color:#e8710a">Model Evaluation</h1>

In [None]:
model.evaluate(test_images, test_labels)

<h1 style="color:#e8710a">Transfer Learning</h1>

In [None]:
from tensorflow.keras.applications import EfficientNetV2L

base_model = EfficientNetV2L(weights='imagenet', include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3))
model = Sequential()
model.add(base_model)  # Add the pre-trained model
model.add(Flatten())  # Flatten the output from the pre-trained model
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))  # Output layer
for layer in base_model.layers:
    layer.trainable = False
    
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# Create an empty RGB array with shape (num_samples, IMG_SIZE, IMG_SIZE, 3)
train_images_rgb = np.empty((train_images.shape[0], IMG_SIZE, IMG_SIZE, 3), dtype=np.uint8)

# Duplicate the single-channel images into all three color channels
train_images_rgb[:, :, :, 0] = train_images[:, :, :, 0]
train_images_rgb[:, :, :, 1] = train_images[:, :, :, 0]
train_images_rgb[:, :, :, 2] = train_images[:, :, :, 0]

# Create an empty RGB array with shape (num_samples, IMG_SIZE, IMG_SIZE, 3)
test_images_rgb = np.empty((test_images.shape[0], IMG_SIZE, IMG_SIZE, 3), dtype=np.uint8)

# Duplicate the single-channel images into all three color channels
test_images_rgb[:, :, :, 0] = test_images[:, :, :, 0]
test_images_rgb[:, :, :, 1] = test_images[:, :, :, 0]
test_images_rgb[:, :, :, 2] = test_images[:, :, :, 0]

In [None]:
num_epochs = 100

history = model.fit(
    train_images_rgb,
    train_labels,
    epochs=num_epochs,
    validation_data=(test_images_rgb, test_labels),
    batch_size=batch_size,
    callbacks=[early_stopping]
)

In [None]:
# Access the training history
train_loss = history.history['loss']
val_loss = history.history['val_loss']
train_acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

# Create subplots for loss and accuracy
plt.figure(figsize=(12, 4))
# Plot training and validation loss
plt.subplot(1, 2, 1)
plt.plot(train_loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.title('Training and Validation Loss')
# Plot training and validation accuracy
plt.subplot(1, 2, 2)
plt.plot(train_acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.title('Training and Validation Accuracy')
plt.tight_layout()
plt.show()

<h1 style="color:#e8710a">Model Evaluation</h1>

In [None]:
model.evaluate(test_images_rgb, test_labels)