In [80]:
import os
import shutil
import random
from sklearn.model_selection import train_test_split
import glob

# Create the main directory structure
def create_dataset_structure(base_dir='car_damage_dataset'):
    # Create main directories
    os.makedirs(base_dir, exist_ok=True)
    
    # Create train, validation, and test directories
    for split in ['train', 'validation', 'test']:
        split_dir = os.path.join(base_dir, split)
        os.makedirs(split_dir, exist_ok=True)
        
        # Create class directories inside each split
        # You can modify this list based on your specific damage categories
        for class_name in ['damaged_car', 'whole']:
            class_dir = os.path.join(split_dir, class_name)
            os.makedirs(class_dir, exist_ok=True)
    
    print(f"Created directory structure in {base_dir}")

# Split and organize images from source folders into the dataset structure
def organize_dataset(source_damaged_dir, source_undamaged_dir, base_dir='car_damage_dataset', 
                    train_ratio=0.7, val_ratio=0.15, test_ratio=0.15):
    """
    Organize car damage images into train/validation/test splits
    
    Parameters:
    - source_damaged_dir: Directory containing damaged car images
    - source_undamaged_dir: Directory containing undamaged car images
    - base_dir: Base directory for the organized dataset
    - train_ratio, val_ratio, test_ratio: Split ratios (should sum to 1)
    """
    # Create directory structure
    create_dataset_structure(base_dir)
    
    # Process each class
    for class_name, source_dir in [('damaged', source_damaged_dir), ('undamaged', source_undamaged_dir)]:
        # Get all image files
        image_extensions = ['*.jpg', '*.jpeg', '*.png']
        image_files = []
        for ext in image_extensions:
            image_files.extend(glob.glob(os.path.join(source_dir, ext)))
            image_files.extend(glob.glob(os.path.join(source_dir, ext.upper())))
        
        print(f"Found {len(image_files)} images in {source_dir}")
        
        # Shuffle the files
        random.shuffle(image_files)
        
        # Split into train, validation, and test sets
        train_end = int(len(image_files) * train_ratio)
        val_end = train_end + int(len(image_files) * val_ratio)
        
        train_files = image_files[:train_end]
        val_files = image_files[train_end:val_end]
        test_files = image_files[val_end:]
        
        # Copy files to respective directories
        for files, split in [(train_files, 'train'), (val_files, 'validation'), (test_files, 'test')]:
            target_dir = os.path.join(base_dir, split, class_name)
            print(f"Copying {len(files)} images to {target_dir}")
            
            for file_path in files:
                file_name = os.path.basename(file_path)
                target_path = os.path.join(target_dir, file_name)
                shutil.copy2(file_path, target_path)

# Alternative: Organize from a single directory with subdirectories for each class
def organize_from_single_source(source_dir, base_dir='car_damage_dataset',
                              train_ratio=0.7, val_ratio=0.15, test_ratio=0.15):
    """
    Organize dataset from a single source directory with class subdirectories
    
    Parameters:
    - source_dir: Root directory containing class subdirectories
    - base_dir: Base directory for the organized dataset
    - train_ratio, val_ratio, test_ratio: Split ratios (should sum to 1)
    """
    # Create directory structure
    create_dataset_structure(base_dir)
    
    # Get all class directories
    class_dirs = [d for d in os.listdir(source_dir) 
                 if os.path.isdir(os.path.join(source_dir, d))]
    
    for class_name in class_dirs:
        class_dir = os.path.join(source_dir, class_name)
        
        # Get all image files
        image_extensions = ['*.jpg', '*.jpeg', '*.png']
        image_files = []
        for ext in image_extensions:
            image_files.extend(glob.glob(os.path.join(class_dir, ext)))
            image_files.extend(glob.glob(os.path.join(class_dir, ext.upper())))
        
        print(f"Found {len(image_files)} images in class {class_name}")
        
        # Shuffle the files
        random.shuffle(image_files)
        
        # Split into train, validation, and test sets
        train_end = int(len(image_files) * train_ratio)
        val_end = train_end + int(len(image_files) * val_ratio)
        
        train_files = image_files[:train_end]
        val_files = image_files[train_end:val_end]
        test_files = image_files[val_end:]
        
        # Copy files to respective directories
        for files, split in [(train_files, 'train'), (val_files, 'validation'), (test_files, 'test')]:
            # Create class directory if it doesn't exist in the split
            target_dir = os.path.join(base_dir, split, class_name)
            os.makedirs(target_dir, exist_ok=True)
            
            print(f"Copying {len(files)} images to {target_dir}")
            
            for file_path in files:
                file_name = os.path.basename(file_path)
                target_path = os.path.join(target_dir, file_name)
                shutil.copy2(file_path, target_path)

# Example usage:
if __name__ == "__main__":
    # Set random seed for reproducibility
    random.seed(42)
    
    # Option 1: If you have separate directories for damaged and undamaged cars
    # organize_dataset(
    #     source_damaged_dir="/path/to/damaged_cars",
    #     source_undamaged_dir="/path/to/undamaged_cars",
    #     base_dir="car_damage_dataset"
    # )
    
    # Option 2: If you have a single directory with class subdirectories
    organize_from_single_source(
        source_dir="whole",  # This should contain subdirectories like "damaged", "undamaged"
        base_dir="damaged_car"
    )
    
    print("Dataset organization complete!")

Created directory structure in damaged_car
Found 2 images in class .ipynb_checkpoints
Copying 1 images to damaged_car\train\.ipynb_checkpoints
Copying 0 images to damaged_car\validation\.ipynb_checkpoints
Copying 1 images to damaged_car\test\.ipynb_checkpoints
Dataset organization complete!


In [81]:
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns

# Set random seed for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# Define paths - update these to your actual data locations
train_data_dir = '/damaged_car/'
validation_data_dir = '/damaged_car/'

# Image parameters
img_width, img_height = 224, 224
input_shape = (img_width, img_height, 3)  # RGB images

# Training parameters
batch_size = 32
epochs = 20

# Data augmentation for training
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Only rescaling for validation
validation_datagen = ImageDataGenerator(rescale=1./255)

# Load training data
train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary'  # 'binary' for damaged/not damaged, 'categorical' for multiple damage types
)

# Load validation data
validation_generator = validation_datagen.flow_from_directory(
    validation_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary'
)

# Build the CNN model
model = Sequential([
    # First convolutional block
    Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
    MaxPooling2D(pool_size=(2, 2)),
    
    # Second convolutional block
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    
    # Third convolutional block
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    
    # Fourth convolutional block
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    
    # Flatten and fully connected layers
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),  # Dropout to prevent overfitting
    Dense(1, activation='sigmoid')  # Binary classification (damaged or not)
])

# Compile the model
model.compile(
    loss='binary_crossentropy',
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
    metrics=['accuracy']
)

# Model summary
model.summary()

# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // batch_size,
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // batch_size
)

# Save the model
model.save('car_damage_detection_model.h5')

# Plot training & validation accuracy and loss
def plot_training_history(history):
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    
    epochs_range = range(len(acc))
    
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(epochs_range, acc, label='Training Accuracy')
    plt.plot(epochs_range, val_acc, label='Validation Accuracy')
    plt.legend(loc='lower right')
    plt.title('Training and Validation Accuracy')
    
    plt.subplot(1, 2, 2)
    plt.plot(epochs_range, loss, label='Training Loss')
    plt.plot(epochs_range, val_loss, label='Validation Loss')
    plt.legend(loc='upper right')
    plt.title('Training and Validation Loss')
    plt.savefig('training_history.png')
    plt.show()

plot_training_history(history)

# Evaluate the model on validation data
validation_generator.reset()
y_pred = model.predict(validation_generator, steps=validation_generator.samples // batch_size + 1)
y_pred_classes = (y_pred > 0.5).astype(int)
y_true = validation_generator.classes

# Print classification report
print(classification_report(y_true, y_pred_classes))

# Plot confusion matrix
cm = confusion_matrix(y_true, y_pred_classes)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.savefig('confusion_matrix.png')
plt.show()

# Function to predict on new images
def predict_damage(image_path):
    from tensorflow.keras.preprocessing import image
    
    img = image.load_img(image_path, target_size=(img_width, img_height))
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0) / 255.0
    
    prediction = model.predict(img_array)[0][0]
    result = "Damaged" if prediction > 0.5 else "Not Damaged"
    confidence = prediction if prediction > 0.5 else 1 - prediction
    
    print(f"Prediction: {result} (Confidence: {confidence:.2f})")
    
    plt.imshow(img)
    plt.title(f"{result} (Confidence: {confidence:.2f})")
    plt.axis('off')
    plt.show()
    
    return result, confidence

# Example usage:
# predict_damage('path/to/test/image.jpg')

Found 0 images belonging to 3 classes.
Found 0 images belonging to 3 classes.


ValueError: The PyDataset has length 0

In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

# Define image dimensions
img_width, img_height = 224, 224
batch_size = 32

# Build the model (assuming you need to create one)
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(img_width, img_height, 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')  # Binary classification: damaged or not damaged
])

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# If you have a pre-trained model, load it instead
# model = tf.keras.models.load_model('car_damage_model.h5')

# Function to process webcam frames and detect car damage
def detect_car_damage_webcam():
    # Open webcam
    cap = cv2.VideoCapture(0)  # 0 is usually the default webcam
    
    if not cap.isOpened():
        print("Error: Could not open webcam.")
        return
    
    print("Webcam opened successfully. Press 'q' to quit.")
    
    while True:
        # Capture frame-by-frame
        ret, frame = cap.read()
        
        if not ret:
            print("Error: Failed to capture image")
            break
        
        # Resize frame for model input
        input_frame = cv2.resize(frame, (img_width, img_height))
        
        # Preprocess the frame for the model
        input_array = np.expand_dims(input_frame, axis=0) / 255.0
        
        # Make prediction
        prediction = model.predict(input_array)[0][0]
        result = "Damaged" if prediction > 0.5 else "Not Damaged"
        confidence = prediction if prediction > 0.5 else 1 - prediction
        
        # Display result on frame
        text = f"{result} (Conf: {confidence:.2f})"
        color = (0, 0, 255) if result == "Damaged" else (0, 255, 0)  # Red for damaged, green for not damaged
        cv2.putText(frame, text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2)
        
        # Display the resulting frame
        cv2.imshow('Car Damage Detection', frame)
        
        # Break the loop when 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    
    # Release the capture and close windows
    cap.release()
    cv2.destroyAllWindows()

# Function to plot training history (if you have trained the model)
def plot_training_history(history):
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    epochs_range = range(len(acc))
    
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(epochs_range, acc, label='Training Accuracy')
    plt.plot(epochs_range, val_acc, label='Validation Accuracy')
    plt.legend(loc='lower right')
    plt.title('Training and Validation Accuracy')
    
    plt.subplot(1, 2, 2)
    plt.plot(epochs_range, loss, label='Training Loss')
    plt.plot(epochs_range, val_loss, label='Validation Loss')
    plt.legend(loc='upper right')
    plt.title('Training and Validation Loss')
    plt.savefig('training_history.png')
    plt.show()

# Function to predict on a single image
def predict_damage(image_path):
    from tensorflow.keras.preprocessing import image
    
    img = image.load_img(image_path, target_size=(img_width, img_height))
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0) / 255.0
    
    prediction = model.predict(img_array)[0][0]
    result = "Damaged" if prediction > 0.5 else "Not Damaged"
    confidence = prediction if prediction > 0.5 else 1 - prediction
    
    print(f"Prediction: {result} (Confidence: {confidence:.2f})")
    
    plt.imshow(img)
    plt.title(f"{result} (Confidence: {confidence:.2f})")
    plt.axis('off')
    plt.show()
    
    return result, confidence

# Start the webcam detection
if __name__ == "__main__":
    # If you need to train the model first, do that here
    # Otherwise, if using a pre-trained model, just run the webcam detection
    detect_car_damage_webcam()

Webcam opened successfully. Press 'q' to quit.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m