In [1]:
#Generating a dataset of 200 images. You can use your own dataset.

import os
import numpy as np
from PIL import Image
import pandas as pd

# Set directories
os.makedirs("dataset/images", exist_ok=True)

labels = []
file_names = []

# Generate 100 black images (no defect)
for i in range(100):
    img = np.zeros((128, 128), dtype=np.uint8)  # black image
    file_name = f"dataset/images/no_defect_{i}.png"
    Image.fromarray(img).save(file_name)
    file_names.append(file_name)
    labels.append("no_defect")

# Generate 100 images with random white scratches (defect)
for i in range(100):
    img = np.zeros((128, 128), dtype=np.uint8)
    num_scratches = np.random.randint(1, 6)
    for _ in range(num_scratches):
        # Randomly choose orientation and position
        orientation = np.random.choice(["horizontal", "vertical"])
        if orientation == "horizontal":
            y = np.random.randint(0, 128)
            x_start = np.random.randint(0, 100)
            x_end = x_start + np.random.randint(5, 28)
            img[y, x_start:x_end] = 255
        else:
            x = np.random.randint(0, 128)
            y_start = np.random.randint(0, 100)
            y_end = y_start + np.random.randint(5, 28)
            img[y_start:y_end, x] = 255
    file_name = f"dataset/images/defect_{i}.png"
    Image.fromarray(img).save(file_name)
    file_names.append(file_name)
    labels.append("defect")

# Create dataframe
df = pd.DataFrame({"file_name": file_names, "label": labels})
df.to_csv("dataset/labels.csv", index=False)
print("Dataset created with labels.csv")


Dataset created with labels.csv


In [2]:
#Training images by baseline CNN

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import pandas as pd
import pickle

# Load dataframe
df = pd.read_csv("dataset/labels.csv")

# Encode labels
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df['label_enc'] = le.fit_transform(df['label'])  # 0=no_defect, 1=defect

# Split dataset
from sklearn.model_selection import train_test_split
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['label_enc'])

# Image data generator
train_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)

train_gen = train_datagen.flow_from_dataframe(
    train_df,
    x_col='file_name',
    y_col='label',
    target_size=(128,128),
    color_mode='grayscale',
    class_mode='binary',
    batch_size=16,
    shuffle=True
)

val_gen = val_datagen.flow_from_dataframe(
    val_df,
    x_col='file_name',
    y_col='label',
    target_size=(128,128),
    color_mode='grayscale',
    class_mode='binary',
    batch_size=16,
    shuffle=False
)

# Baseline CNN model
model = Sequential([
    Conv2D(16, (3,3), activation='relu', input_shape=(128,128,1)),
    MaxPooling2D(2,2),
    Conv2D(32, (3,3), activation='relu'),
    MaxPooling2D(2,2),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train model
model.fit(train_gen, validation_data=val_gen, epochs=5)

# Save model
model.save("scratch_detector_cnn.h5")

# Save label encoder
with open("label_encoder.pkl", "wb") as f:
    pickle.dump(le, f)

print("Model and label encoder saved!")


Found 160 validated image filenames belonging to 2 classes.
Found 40 validated image filenames belonging to 2 classes.
Epoch 1/5


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  self._warn_if_super_not_called()


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 251ms/step - accuracy: 0.4721 - loss: 0.6754 - val_accuracy: 0.5000 - val_loss: 0.5346
Epoch 2/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 228ms/step - accuracy: 0.6974 - loss: 0.4823 - val_accuracy: 1.0000 - val_loss: 0.3757
Epoch 3/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 257ms/step - accuracy: 1.0000 - loss: 0.3499 - val_accuracy: 1.0000 - val_loss: 0.3060
Epoch 4/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 228ms/step - accuracy: 1.0000 - loss: 0.2558 - val_accuracy: 1.0000 - val_loss: 0.1944
Epoch 5/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 194ms/step - accuracy: 1.0000 - loss: 0.1305 - val_accuracy: 0.9500 - val_loss: 0.0984




Model and label encoder saved!


In [None]:
#Now, you can save the .pkl file and .h5 file and run the app.py to run the UI and predict defect in an unseen test image.