In [1]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Activation, Flatten, Dense, Dropout
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans


2025-01-26 19:10:24.319044: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-26 19:10:24.328740: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1737907824.339838   37120 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1737907824.343455   37120 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-26 19:10:24.355413: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

In [2]:
# ---------------------------
# 1. IMAGE PROCESSING (MORPH + THRESH + STACK)
# ---------------------------

def preprocess_image(img):
    """
    A simple example pipeline:
      - Assume img is a single-channel grayscale or read in BGR -> convert to gray
      - Edges (Canny)
      - Threshold (Otsu)
      - Stack channels => 3-ch "grayscale" image
    """
    # If it's BGR, convert
    if len(img.shape) == 3 and img.shape[-1] == 3:
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    else:
        gray = img
    
    # Edges
    edges = cv2.Canny(gray, 100, 200)
    
    # Otsu's threshold
    _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
    
    # Stack into 3-channel
    stacked = np.dstack([gray, edges, thresh])  # shape => (H, W, 3)
    return stacked

def save_preprocessed_dataset(source_root, dest_root, splits=["train","valid","test"], classes=None):
    """For each image in source_root/..., apply preprocess_image, save as BMP."""
    if classes is None:
        # or use a known list
        classes = os.listdir(os.path.join(source_root, splits[0]))
    
    for split in splits:
        for cls in classes:
            src_dir = os.path.join(source_root, split, cls)
            dst_dir = os.path.join(dest_root, split, cls)
            os.makedirs(dst_dir, exist_ok=True)
            
            for fname in os.listdir(src_dir):
                if not fname.lower().endswith(('.bmp')):
                    continue
                src_path = os.path.join(src_dir, fname)
                img = cv2.imread(src_path)
                if img is None:
                    print(f"Warning: failed to read {src_path}")
                    continue
                processed = preprocess_image(img)
                outname = os.path.splitext(fname)[0] + ".bmp"
                outpath = os.path.join(dst_dir, outname)
                cv2.imwrite(outpath, processed)

In [3]:
# ---------------------------
# 2. BUILD + TRAIN A CNN
# ---------------------------

def build_cnn(input_shape=(200,200,3), num_classes=6):
    model = Sequential([
        Conv2D(32, (3,3), activation='relu', input_shape=input_shape),
        MaxPooling2D(pool_size=(2,2)),
        
        Conv2D(64, (3,3), activation='relu'),
        MaxPooling2D(pool_size=(2,2)),
        
        Conv2D(128, (3,3), activation='relu'),
        MaxPooling2D(pool_size=(2,2)),
        
        Flatten(),
        Dense(256, activation='relu'),
        Dropout(0.2),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

def train_cnn(train_dir, valid_dir, input_shape=(200,200), batch_size=32, epochs=10):
    """Use ImageDataGenerator, train a basic CNN on preprocessed images."""
    train_datagen = ImageDataGenerator(rescale=1./255,
                                       rotation_range=20,
                                       width_shift_range=0.1,
                                       height_shift_range=0.1,
                                       horizontal_flip=True)
    val_datagen = ImageDataGenerator(rescale=1./255)
    
    train_gen = train_datagen.flow_from_directory(
        train_dir, target_size=input_shape, batch_size=batch_size,
        class_mode='categorical', shuffle=True
    )
    valid_gen = val_datagen.flow_from_directory(
        valid_dir, target_size=input_shape, batch_size=batch_size,
        class_mode='categorical', shuffle=False
    )
    
    # Quick check: number of classes
    num_classes = len(train_gen.class_indices)
    
    # Build the CNN
    model = build_cnn(input_shape=(input_shape[0], input_shape[1], 3),
                      num_classes=num_classes)
    
    history = model.fit(
        train_gen,
        epochs=epochs,
        validation_data=valid_gen
    )
    return model, history

In [4]:
# ---------------------------
# 3. FEATURE EXTRACTION
# ---------------------------

def extract_features(model, generator):
    """
    Extract features from the Dense(256) layer (3rd from last).
    """
    # Make sure the model is built (model.summary() should show the shape).
    feature_model = keras.Model(
        inputs=model.layers[0].input,
        outputs=model.layers[-3].output  # if your second-to-last layer is dropout
    )
    
    all_feats = []
    all_labels = []
    
    generator.reset()
    steps = len(generator)
    for _ in range(steps):
        imgs, labels = next(generator, (None, None))
        if imgs is None:
            break
        feats = feature_model.predict(imgs)
        all_feats.append(feats)
        all_labels.append(labels)
    
    X = np.concatenate(all_feats, axis=0)
    Y = np.concatenate(all_labels, axis=0)
    return X, Y



In [5]:
# ---------------------------
# 4. PCA
# ---------------------------

def apply_pca(features, n_components=20, whiten=True):
    pca = PCA(n_components=n_components, whiten=whiten)
    reduced = pca.fit_transform(features)
    return reduced, pca

In [6]:
# ---------------------------
# 5. K-MEANS CLUSTERING + NOVELTY
# ---------------------------

def run_kmeans(features, n_clusters=6):
    km = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
    km.fit(features)
    return km

def agent_novelty_check(features, kmeans, dist_threshold=10.0):
    """
    For each feature vector, measure distance to the assigned cluster center.
    If distance > threshold => "novel"
    """
    labels = kmeans.predict(features)
    centers = kmeans.cluster_centers_
    
    distances = []
    novelty_flags = []
    for i, fvec in enumerate(features):
        c = centers[labels[i]]
        dist = np.linalg.norm(fvec - c)
        distances.append(dist)
        if dist > dist_threshold:
            novelty_flags.append(True)   # "something new"
        else:
            novelty_flags.append(False)  # "old/familiar"
    return novelty_flags, distances

In [7]:
# ---------------------------
# 6. MAIN
# ---------------------------

def main():
    # 0) Paths
    SOURCE_ROOT = "NEU_Metal_Surface_Defects_Data"         # original images
    DEST_ROOT   = "NEU_Metal_Surface_Defects_Data_Preproc" # preprocessed
    train_dir = os.path.join(DEST_ROOT, "train")
    valid_dir = os.path.join(DEST_ROOT, "valid")
    test_dir  = os.path.join(DEST_ROOT, "test")
    
    # 1) Preprocess + Save 
    #    (Do it once, then you can comment it out)
    save_preprocessed_dataset(SOURCE_ROOT, DEST_ROOT)
    
    # 2) Train CNN
    model, history = train_cnn(train_dir, valid_dir, input_shape=(200,200), epochs=5)
    
    # 3) Evaluate CNN on test data
    test_datagen = ImageDataGenerator(rescale=1./255)
    test_gen = test_datagen.flow_from_directory(
        test_dir, target_size=(200,200), batch_size=32, class_mode='categorical'
    )
    test_loss, test_acc = model.evaluate(test_gen)
    print(f"Test accuracy: {test_acc:.3f}")
    
    # 4) Extract features from training set
    #    (We could also combine train+valid for more data)
    #    We'll do it from the training generator for demonstration
    train_datagen_2 = ImageDataGenerator(rescale=1./255) 
    train_gen_2 = train_datagen_2.flow_from_directory(
        train_dir, target_size=(200,200), batch_size=32, class_mode='categorical', shuffle=False
    )
    train_feats, train_lbls = extract_features(model, train_gen_2)
    
    # 5) Apply PCA (optional)
    #    Suppose we do 20 components
    pca_feats, pca_model = apply_pca(train_feats, n_components=20, whiten=True)
    
    # 6) K-Means
    #    Let's do 6 clusters = 6 known defect classes
    kmeans = run_kmeans(pca_feats, n_clusters=6)
    
    # 7) Agent novelty check on test set
    #    a) extract test features -> PCA transform -> measure distance
    test_feats, test_lbls = extract_features(model, test_gen)
    test_feats_pca = pca_model.transform(test_feats)
    novelty_flags, distances = agent_novelty_check(test_feats_pca, kmeans, dist_threshold=15.0)
    
    # 8) Simple "agent" logic
    #    If an image is flagged as novel, print "Nim the Monkey is confused..."
    for i, is_novel in enumerate(novelty_flags):
        if is_novel:
            print(f"[Image {i}] Nim is confused, this might be new or interesting! distance={distances[i]:.2f}")
        # else do nothing, it's "old/familiar"

In [8]:
if __name__ == "__main__":
    main()

Found 1656 images belonging to 6 classes.
Found 72 images belonging to 6 classes.


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
I0000 00:00:1737907826.982676   37120 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 4543 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4060 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9
  self._warn_if_super_not_called()


Epoch 1/5


I0000 00:00:1737907828.609675   37253 service.cc:148] XLA service 0x7d25cc003f20 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1737907828.609705   37253 service.cc:156]   StreamExecutor device (0): NVIDIA GeForce RTX 4060 Laptop GPU, Compute Capability 8.9
2025-01-26 19:10:28.627562: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1737907828.734856   37253 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m 1/52[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m5:14[0m 6s/step - accuracy: 0.2500 - loss: 1.7901

I0000 00:00:1737907833.806008   37253 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m36/52[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m1s[0m 106ms/step - accuracy: 0.3866 - loss: 1.6384

KeyboardInterrupt: 

NameError: name 'model' is not defined