# CRACKrete: A Deep Learning-Based Image Classification Tool for Concrete Crack Detection

## Step 1: Setting Up Your Environment

In [None]:
pip install tensorflow opencv-python matplotlib scikit-learn kaggle


## Step 2: Load and Explore the Dataset

In [None]:

!kaggle datasets download ahsanulislam/concrete-surface-image-filtered-with-match-filter


In [None]:
import zipfile
import os

# Path to the downloaded ZIP file
zip_path = 'concrete-surface-image-filtered-with-match-filter.zip'

# Extract the contents
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall('./data/')  # Specify the extraction path


In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import cv2
import numpy as np

def preprocess_image(img):
    # Convert image to grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    # Apply Gaussian Blur to reduce noise
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    
    # Apply edge detection
    edges = cv2.Canny(blurred, threshold1=30, threshold2=100)
    
    # Convert edges back to RGB to match the expected input size
    edges_rgb = cv2.cvtColor(edges, cv2.COLOR_GRAY2RGB)
    
    return edges_rgb

# Apply this preprocessing to your dataset:
datagen = ImageDataGenerator(
    rescale=1./255,
    preprocessing_function=preprocess_image,  # Apply the preprocessing to every image
    validation_split=0.2  # Keep validation split
)

# Load training and validation data
train_data = datagen.flow_from_directory(
    './data/Matched Filter/', 
    target_size=(227, 227),  
    batch_size=32,
    class_mode='binary',
    subset='training'
)

val_data = datagen.flow_from_directory(
    './data/Matched Filter/', 
    target_size=(227, 227),  
    batch_size=32,
    class_mode='binary',
    subset='validation'
)


import numpy as np
from collections import Counter

counter = Counter(train_data.classes)
print(f"Class Distribution: {counter}")



In [None]:
import matplotlib.pyplot as plt

# Get a batch of images and labels from the training data
x_batch, y_batch = next(train_data)

# Plot a few images
plt.figure(figsize=(10, 10))
for i in range(9):
    plt.subplot(3, 3, i+1)
    plt.imshow(x_batch[i], cmap='gray')
    plt.title(f'Label: {"Cracked" if y_batch[i] == 1 else "Non-Cracked"}')
    plt.axis('off')
plt.show()


## Step 3: Build the CNN Model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Build the CNN model
# Modified CNN model to reduce parameters
model = Sequential()

# 1st Convolutional Layer
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(227, 227, 3)))
model.add(MaxPooling2D(pool_size=(2, 2)))

# 2nd Convolutional Layer
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

# 3rd Convolutional Layer (added more pooling)
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

# 4th Convolutional Layer (optional to go deeper)
model.add(Conv2D(256, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Flattening
model.add(Flatten())

# Dense Layer with fewer neurons
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))

# Output Layer
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


# Display model summary
model.summary()


## Step 4:Train the model

In [None]:
from tensorflow.keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
# Train the model
history = model.fit(
    train_data, 
    epochs=10,  # Start with 10 epochs
    validation_data=val_data
)


In [None]:
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()


## Step 5: Evaluate the Model

In [None]:
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report

# Get predictions for validation data
val_preds = model.predict(val_data)
val_preds_classes = np.where(val_preds > 0.5, 1, 0)  # Convert probabilities to binary predictions

# Get the true labels
true_classes = val_data.classes

# Generate confusion matrix
conf_matrix = confusion_matrix(true_classes, val_preds_classes)

# Display confusion matrix
print("Confusion Matrix:")
print(conf_matrix)

# Classification report (precision, recall, f1-score)
class_report = classification_report(true_classes, val_preds_classes, target_names=['Non-Cracked', 'Cracked'])
print("Classification Report:")
print(class_report)


In [None]:
# Evaluate the model on test data (if available)
test_loss, test_accuracy = model.evaluate(test_data)  # Assuming you have test_data loaded similarly to train_data

print(f"Test Accuracy: {test_accuracy * 100:.2f}%")
print(f"Test Loss: {test_loss:.4f}")


In [None]:
from sklearn.metrics import classification_report, confusion_matrix

val_data.reset()
predictions = (model.predict(val_data) > 0.5).astype("int32")
print(classification_report(val_data.classes, predictions))
print(confusion_matrix(val_data.classes, predictions))


## Step 6: Optimize the Model