<a href="https://colab.research.google.com/github/andersod55123456789/Dales_code_box/blob/main/Crack_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Part Classification Model: Bright Crack Lines vs Dark Lap Lines
# Designed for Google Colab

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import cv2
from PIL import Image
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from google.colab import files, drive
import zipfile
import shutil
from pathlib import Path

# Mount Google Drive (optional - for saving models)
# drive.mount('/content/drive')

print("TensorFlow version:", tf.__version__)
print("GPU Available:", tf.config.list_physical_devices('GPU'))

# =============================================================================
# STEP 1: DATA UPLOAD AND PREPARATION
# =============================================================================

def upload_and_extract_data():
    """Upload and extract your image data"""
    print("Please upload your image folder as a ZIP file...")
    uploaded = files.upload()

    # Extract the uploaded zip file
    for filename in uploaded.keys():
        print(f'Extracting {filename}...')
        with zipfile.ZipFile(filename, 'r') as zip_ref:
            zip_ref.extractall('/content/images')

    # List the contents
    image_dir = '/content/images'
    if os.path.exists(image_dir):
        print(f"Images extracted to: {image_dir}")
        print("Contents:", os.listdir(image_dir))
        return image_dir
    else:
        print("Please check the extracted folder structure")
        return None

def organize_data_for_training(image_dir, output_dir='/content/organized_data'):
    """
    Organize images into training structure
    You'll need to manually sort a few examples first, then we can use active learning
    """
    os.makedirs(output_dir, exist_ok=True)
    os.makedirs(f'{output_dir}/crack_lines', exist_ok=True)
    os.makedirs(f'{output_dir}/lap_lines', exist_ok=True)

    # Get all image files
    image_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']
    image_files = []

    for root, dirs, files in os.walk(image_dir):
        for file in files:
            if any(file.lower().endswith(ext) for ext in image_extensions):
                image_files.append(os.path.join(root, file))

    print(f"Found {len(image_files)} images")

    # Display first few images for manual classification
    print("\nDisplaying first 10 images for manual classification:")
    print("Please note which ones have CRACK LINES vs LAP LINES")

    fig, axes = plt.subplots(2, 5, figsize=(20, 8))
    axes = axes.ravel()

    for i, img_path in enumerate(image_files[:10]):
        img = cv2.imread(img_path)
        if img is not None:
            img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            axes[i].imshow(img_rgb)
            axes[i].set_title(f"Image {i+1}: {os.path.basename(img_path)}")
            axes[i].axis('off')

    plt.tight_layout()
    plt.show()

    return image_files

def manual_sort_initial_data(image_files, num_examples=50):
    """
    Helper function for manual sorting of initial training data
    """
    print(f"\nManual Classification Phase:")
    print("We'll show you {num_examples} images one by one.")
    print("Type 'c' for crack lines, 'l' for lap lines, 's' to skip")

    crack_examples = []
    lap_examples = []

    for i, img_path in enumerate(image_files[:num_examples]):
        # Display image
        img = cv2.imread(img_path)
        if img is None:
            continue

        plt.figure(figsize=(8, 6))
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        plt.imshow(img_rgb)
        plt.title(f"Image {i+1}/{num_examples}: {os.path.basename(img_path)}")
        plt.axis('off')
        plt.show()

        # Get user input
        while True:
            label = input("Crack (c), Lap (l), or Skip (s)? ").lower().strip()
            if label in ['c', 'l', 's']:
                break
            print("Please enter 'c', 'l', or 's'")

        if label == 'c':
            crack_examples.append(img_path)
            # Copy to crack folder
            shutil.copy2(img_path, '/content/organized_data/crack_lines/')
        elif label == 'l':
            lap_examples.append(img_path)
            # Copy to lap folder
            shutil.copy2(img_path, '/content/organized_data/lap_lines/')

        plt.close()

    print(f"\nManual classification complete!")
    print(f"Crack examples: {len(crack_examples)}")
    print(f"Lap examples: {len(lap_examples)}")

    return crack_examples, lap_examples

# =============================================================================
# STEP 2: FEATURE EXTRACTION AND ANALYSIS
# =============================================================================

def extract_line_features(image_path):
    """Extract features that distinguish bright cracks from dark laps"""
    img = cv2.imread(image_path)
    if img is None:
        return None

    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    features = {}

    # 1. Line detection using Hough Transform
    edges = cv2.Canny(gray, 50, 150, apertureSize=3)
    lines = cv2.HoughLines(edges, 1, np.pi/180, threshold=80)
    features['num_lines'] = len(lines) if lines is not None else 0

    # 2. Brightness statistics
    features['mean_brightness'] = np.mean(gray)
    features['std_brightness'] = np.std(gray)
    features['max_brightness'] = np.max(gray)
    features['min_brightness'] = np.min(gray)

    # 3. Edge intensity
    features['edge_intensity'] = np.mean(edges)
    features['edge_count'] = np.sum(edges > 0)

    # 4. Contrast measures
    features['contrast'] = features['max_brightness'] - features['min_brightness']
    features['rms_contrast'] = np.sqrt(np.mean((gray - features['mean_brightness'])**2))

    # 5. Texture analysis using Local Binary Patterns
    from skimage.feature import local_binary_pattern
    lbp = local_binary_pattern(gray, P=8, R=1, method='uniform')
    features['lbp_uniformity'] = len(np.unique(lbp))

    # 6. Gradient analysis
    grad_x = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
    grad_y = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
    gradient_magnitude = np.sqrt(grad_x**2 + grad_y**2)
    features['gradient_mean'] = np.mean(gradient_magnitude)
    features['gradient_std'] = np.std(gradient_magnitude)

    return features

def analyze_feature_differences(crack_examples, lap_examples):
    """Analyze the feature differences between crack and lap examples"""
    crack_features = []
    lap_features = []

    print("Extracting features from crack examples...")
    for img_path in crack_examples:
        features = extract_line_features(img_path)
        if features:
            features['label'] = 'crack'
            crack_features.append(features)

    print("Extracting features from lap examples...")
    for img_path in lap_examples:
        features = extract_line_features(img_path)
        if features:
            features['label'] = 'lap'
            lap_features.append(features)

    # Create DataFrame for analysis
    df = pd.DataFrame(crack_features + lap_features)

    # Statistical analysis
    print("\nFeature Analysis:")
    for column in df.columns:
        if column != 'label':
            crack_mean = df[df['label'] == 'crack'][column].mean()
            lap_mean = df[df['label'] == 'lap'][column].mean()
            print(f"{column}:")
            print(f"  Crack mean: {crack_mean:.3f}")
            print(f"  Lap mean: {lap_mean:.3f}")
            print(f"  Difference: {abs(crack_mean - lap_mean):.3f}")
            print()

    # Visualization
    numeric_columns = df.select_dtypes(include=[np.number]).columns
    fig, axes = plt.subplots(3, 3, figsize=(15, 12))
    axes = axes.ravel()

    for i, column in enumerate(numeric_columns[:9]):
        df.boxplot(column=column, by='label', ax=axes[i])
        axes[i].set_title(f'{column}')

    plt.tight_layout()
    plt.show()

    return df

# =============================================================================
# STEP 3: CNN MODEL BUILDING
# =============================================================================

def create_cnn_model(input_shape=(224, 224, 3)):
    """Create a CNN model optimized for line detection"""
    model = models.Sequential([
        # First convolutional block
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),

        # Second convolutional block
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),

        # Third convolutional block
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),

        # Fourth convolutional block
        layers.Conv2D(256, (3, 3), activation='relu'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),

        # Global average pooling instead of flatten to reduce overfitting
        layers.GlobalAveragePooling2D(),

        # Dense layers
        layers.Dense(512, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(256, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(1, activation='sigmoid')  # Binary classification
    ])

    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['accuracy', 'precision', 'recall']
    )

    return model

def prepare_data_generators(data_dir, img_size=(224, 224), batch_size=32):
    """Prepare data generators with augmentation"""

    # Data augmentation for training
    train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        vertical_flip=True,
        validation_split=0.2  # 20% for validation
    )

    # Only rescaling for validation
    val_datagen = ImageDataGenerator(
        rescale=1./255,
        validation_split=0.2
    )

    # Create generators
    train_generator = train_datagen.flow_from_directory(
        data_dir,
        target_size=img_size,
        batch_size=batch_size,
        class_mode='binary',
        subset='training',
        classes=['crack_lines', 'lap_lines']  # 0 = crack, 1 = lap
    )

    validation_generator = val_datagen.flow_from_directory(
        data_dir,
        target_size=img_size,
        batch_size=batch_size,
        class_mode='binary',
        subset='validation',
        classes=['crack_lines', 'lap_lines']
    )

    return train_generator, validation_generator

def train_model(model, train_gen, val_gen, epochs=50):
    """Train the model with callbacks"""

    callbacks = [
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=10,
            restore_best_weights=True
        ),
        tf.keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience=5,
            min_lr=1e-7
        ),
        tf.keras.callbacks.ModelCheckpoint(
            'best_model.h5',
            monitor='val_accuracy',
            save_best_only=True
        )
    ]

    history = model.fit(
        train_gen,
        epochs=epochs,
        validation_data=val_gen,
        callbacks=callbacks
    )

    return history

def plot_training_history(history):
    """Plot training history"""
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))

    # Accuracy
    axes[0, 0].plot(history.history['accuracy'], label='Training Accuracy')
    axes[0, 0].plot(history.history['val_accuracy'], label='Validation Accuracy')
    axes[0, 0].set_title('Model Accuracy')
    axes[0, 0].set_xlabel('Epoch')
    axes[0, 0].set_ylabel('Accuracy')
    axes[0, 0].legend()

    # Loss
    axes[0, 1].plot(history.history['loss'], label='Training Loss')
    axes[0, 1].plot(history.history['val_loss'], label='Validation Loss')
    axes[0, 1].set_title('Model Loss')
    axes[0, 1].set_xlabel('Epoch')
    axes[0, 1].set_ylabel('Loss')
    axes[0, 1].legend()

    # Precision
    axes[1, 0].plot(history.history['precision'], label='Training Precision')
    axes[1, 0].plot(history.history['val_precision'], label='Validation Precision')
    axes[1, 0].set_title('Model Precision')
    axes[1, 0].set_xlabel('Epoch')
    axes[1, 0].set_ylabel('Precision')
    axes[1, 0].legend()

    # Recall
    axes[1, 1].plot(history.history['recall'], label='Training Recall')
    axes[1, 1].plot(history.history['val_recall'], label='Validation Recall')
    axes[1, 1].set_title('Model Recall')
    axes[1, 1].set_xlabel('Epoch')
    axes[1, 1].set_ylabel('Recall')
    axes[1, 1].legend()

    plt.tight_layout()
    plt.show()

# =============================================================================
# STEP 4: MODEL EVALUATION AND SORTING
# =============================================================================

def evaluate_model(model, val_gen):
    """Evaluate model performance"""
    # Get predictions
    val_gen.reset()
    predictions = model.predict(val_gen)
    y_pred = (predictions > 0.5).astype(int).flatten()
    y_true = val_gen.classes

    # Classification report
    print("Classification Report:")
    print(classification_report(y_true, y_pred, target_names=['Crack Lines', 'Lap Lines']))

    # Confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=['Crack Lines', 'Lap Lines'],
                yticklabels=['Crack Lines', 'Lap Lines'])
    plt.title('Confusion Matrix')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.show()

    return accuracy_score(y_true, y_pred)

def sort_images(model, image_dir, output_dir='/content/sorted_images'):
    """Sort all images using the trained model"""

    # Create output directories
    os.makedirs(f'{output_dir}/crack_lines', exist_ok=True)
    os.makedirs(f'{output_dir}/lap_lines', exist_ok=True)
    os.makedirs(f'{output_dir}/uncertain', exist_ok=True)

    # Get all image files
    image_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']
    image_files = []

    for root, dirs, files in os.walk(image_dir):
        for file in files:
            if any(file.lower().endswith(ext) for ext in image_extensions):
                image_files.append(os.path.join(root, file))

    print(f"Sorting {len(image_files)} images...")

    crack_count = 0
    lap_count = 0
    uncertain_count = 0

    for img_path in image_files:
        try:
            # Load and preprocess image
            img = cv2.imread(img_path)
            if img is None:
                continue

            img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img_resized = cv2.resize(img_rgb, (224, 224))
            img_normalized = img_resized / 255.0
            img_batch = np.expand_dims(img_normalized, axis=0)

            # Predict
            prediction = model.predict(img_batch, verbose=0)[0][0]

            # Sort based on prediction confidence
            filename = os.path.basename(img_path)

            if prediction < 0.3:  # Strong crack prediction
                shutil.copy2(img_path, f'{output_dir}/crack_lines/{filename}')
                crack_count += 1
            elif prediction > 0.7:  # Strong lap prediction
                shutil.copy2(img_path, f'{output_dir}/lap_lines/{filename}')
                lap_count += 1
            else:  # Uncertain predictions
                shutil.copy2(img_path, f'{output_dir}/uncertain/{filename}')
                uncertain_count += 1

        except Exception as e:
            print(f"Error processing {img_path}: {e}")

    print(f"\nSorting complete!")
    print(f"Crack lines: {crack_count}")
    print(f"Lap lines: {lap_count}")
    print(f"Uncertain: {uncertain_count}")

    return crack_count, lap_count, uncertain_count

# =============================================================================
# MAIN EXECUTION PIPELINE
# =============================================================================

def main_pipeline():
    """Main execution pipeline"""
    print("=== PART CLASSIFICATION MODEL ===")
    print("This model will classify parts with bright crack lines vs dark lap lines")

    # Step 1: Upload and prepare data
    print("\n1. UPLOADING AND PREPARING DATA")
    print("Please zip your image folder and upload it when prompted.")

    # Uncomment these lines when ready to run:
    # image_dir = upload_and_extract_data()
    # if not image_dir:
    #     return

    # image_files = organize_data_for_training(image_dir)
    # crack_examples, lap_examples = manual_sort_initial_data(image_files)

    # Step 2: Feature analysis (optional but recommended)
    print("\n2. FEATURE ANALYSIS")
    # df = analyze_feature_differences(crack_examples, lap_examples)

    # Step 3: Train CNN model
    print("\n3. TRAINING CNN MODEL")
    model = create_cnn_model()
    print("Model architecture:")
    model.summary()

    # Prepare data generators
    # train_gen, val_gen = prepare_data_generators('/content/organized_data')

    # Train model
    # history = train_model(model, train_gen, val_gen, epochs=50)
    # plot_training_history(history)

    # Step 4: Evaluate model
    print("\n4. MODEL EVALUATION")
    # accuracy = evaluate_model(model, val_gen)
    # print(f"Final validation accuracy: {accuracy:.3f}")

    # Step 5: Sort all images
    print("\n5. SORTING ALL IMAGES")
    # sort_images(model, image_dir)

    print("\n=== PIPELINE COMPLETE ===")
    print("Check the /content/sorted_images folder for results!")

# Instructions for running
print("""
INSTRUCTIONS FOR USE:

1. Upload this notebook to Google Colab
2. Run the setup cells to install dependencies
3. Zip your image folder (C:\\Users\\Tenne\\Desktop\\OneDrive_1_8-8-2025)
4. Run main_pipeline() and follow the prompts
5. Manually classify ~50 initial examples when prompted
6. The model will train and then sort all your images

The model will create three folders:
- crack_lines: Images with bright crack lines (confident predictions)
- lap_lines: Images with dark lap lines (confident predictions)
- uncertain: Images the model is unsure about (manual review recommended)

Uncomment the lines in main_pipeline() when ready to run!
""")

if __name__ == "__main__":
    # Uncomment the next line when ready to run the full pipeline
    # main_pipeline()
    pass