<a href="https://colab.research.google.com/github/anilkumarchebrolu/keras_image_classification/blob/master/Keras_image_classification_with_concrete_crack_dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Introduction

This is a simple script to understand Keras Image Classification to verify whether Concrete Cracked or not (Concrete Crack dataset)



*   Install necessary packages.
*   Download the data also visualizing the same.
*   Splitting the dataset
*   Creating and Training Model
*   Model Prediction
*   Using Metrics to evaluate model












## Packages to Install

In [None]:
!pip install rarfile
!pip install tensorflow-gpu==1.15.0
!pip install Keras==2.3.1

Collecting tensorflow-gpu==1.15.0
  Using cached https://files.pythonhosted.org/packages/a5/ad/933140e74973fb917a194ab814785e7c23680ca5dee6d663a509fe9579b6/tensorflow_gpu-1.15.0-cp36-cp36m-manylinux2010_x86_64.whl
Installing collected packages: tensorflow-gpu
Successfully installed tensorflow-gpu-1.15.0


In [None]:
# Restarting Run time
exit()

## Fetching Dataset

Concrete crack image classification dataset consists of images of concrete with cracks and without cracks. 

Out goal is to classify them properly (https://data.mendeley.com/datasets/5y9wdsg2zt/2)




In [None]:
import requests, zipfile, io
import rarfile

# Downloading dataset file and storing it locally
dataset_url = "https://md-datasets-cache-zipfiles-prod.s3.eu-west-1.amazonaws.com/5y9wdsg2zt-2.zip"
r = requests.get(dataset_url, stream=True)
z = zipfile.ZipFile(io.BytesIO(r.content))
z.extractall("/root/cc_dataset")

# Downloaded dataset is extracted
rar_file = rarfile.RarFile("/root/cc_dataset/Concrete Crack Images for Classification.rar")
rar_file.extractall("/root/cc_dataset/cc_dataset_extracted")

## Splitting the dataset

In [None]:
# Imports for this section
import os
from pathlib import Path
import cv2
import random
import keras
from keras.utils import np_utils

Using TensorFlow backend.


In [None]:
# Creating dataset by loading images from 'Positive' and 'Negative' folders
# Restricting dataset to 20K images due to memory issues
dataset_path = Path("/root/cc_dataset/cc_dataset_extracted")

X = []
y = []
for folder in os.listdir(dataset_path):
    print(f"Number of {folder} images are {len(os.listdir(dataset_path/folder))}")
    for idx, image in enumerate(os.listdir(dataset_path/folder)):
        if idx >= 10000:
            break
        image_array = cv2.imread(str(dataset_path/folder/image))
        X.append(image_array)
        if folder == "Positive":
            y.append(1)
        else:
            y.append(0)

Number of Negative images are 20000
Number of Positive images are 20000


In [None]:
# One Hot encoding of y
y = np_utils.to_categorical(y, num_classes=2)

### Visualize Image

In [None]:
# Visualize image
from google.colab.patches import cv2_imshow

def show_cv_image(image_numpy):
    cv2_imshow(image_numpy)

In [None]:
# Randomly Picks an Image and Shows its class
# To visualize dataset run this module multiple times

random_num = random.randint(0,len(X)-1)
if y[random_num][1]:
  print("Cracked Concrete Image 'positive'\n")
else:
  print("Non Cracked Concrete Image 'Negative'\n")
show_cv_image(X[random_num])


Split Train, Val and Test Dataset

In [None]:
from sklearn.model_selection import train_test_split
import numpy as np
random.seed(4)

In [None]:
# Transforming List of X and y into numpay arrays
X = np.array(X)
y = np.array(y)

In [None]:
X_train, X_tv, y_train, y_tv = train_test_split(X, y, shuffle=True, test_size = 0.2)
X_val, X_test, y_val, y_test = train_test_split(X_tv, y_tv, shuffle=False, test_size = 0.2)

In [None]:
print(f"length of training samples {len(X_train)}")
print(f"length of validation samples {len(X_val)}")
print(f"length of test samples {len(X_test)}")

# Model Training

In [None]:
import keras
from keras import Sequential
from keras.layers import Conv2D, MaxPooling2D, AveragePooling1D, Activation, Flatten, Dense, Dropout
from keras.activations import relu, softmax
from keras.optimizers import Adam
from keras.losses import categorical_crossentropy, sparse_categorical_crossentropy
from keras.metrics import Accuracy
from keras.utils.np_utils import to_categorical

keras.backend.set_image_data_format("channels_last")

In [None]:
# Model Creation

In [None]:
model = Sequential()
model.add(Conv2D(input_shape=X_train[0].shape, filters= 64, kernel_size = 3, strides= (2,2)))
model.add(Activation(relu))
model.add(Conv2D(filters= 64, kernel_size = 3, strides= (2,2)))
model.add(Activation(relu))
model.add(Conv2D(filters= 64, kernel_size = 3, strides= (2,2)))
model.add(Activation(relu))
model.add(Conv2D(filters= 128, kernel_size = 3, strides= (2,2)))
model.add(Activation(relu))
model.add(Flatten())
model.add(Dense(256))
model.add(Dropout(0.5))
model.add(Activation(relu))
model.add(Dense(64))
model.add(Activation(relu))
model.add(Dense(2))
model.add(Activation(softmax))

In [None]:
model.summary()

In [None]:
# Model Compilation

In [None]:
model.compile(Adam(), loss=categorical_crossentropy, metrics=['accuracy'])

In [None]:
# Model Fit

In [None]:
model.fit(X_train, y_train, batch_size=100, epochs = 10, validation_data=(X_val, y_val))

# Model Prediction and Manual Validation


In [None]:
y_pred = model.predict_classes(X_val, batch_size=30)

In [None]:
y_pred

In [None]:
y_valid = np.argmax(y_val, axis=1)

In [None]:
np.unique(y_pred == y_valid, return_counts=True)

In [None]:
# Randomly Picks an Image and Shows its class
# To visualize dataset run this module multiple times

random_num = random.randint(0,len(X_test)-1)

if y_test[random_num][1]:
  print("Cracked Concrete Image 'positive'")
else:
  print("Non Cracked Concrete Image 'Negative'")

text = ""

if model.predict_classes(np.array([X_test[random_num]]), batch_size=30)[0]:
    text = "Cracked"
else:
    text = "Not Cracked"
print(f"Predicted that the concrete image is '{text}'\n")
show_cv_image(X_test[random_num])


# Metrics

In [None]:
# Imports
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
# Confusion Matrix and Classification Report for Train
y_pred = model.predict_classes(X_train, batch_size=30)
target_names = ['Cracked', 'Not Cracked']
print(confusion_matrix(np.argmax(y_train, axis=1), y_pred))
print(classification_report(np.argmax(y_train, axis=1), y_pred, target_names=target_names))

In [None]:
# Confusion Matrix and Classification Report for Train
y_pred = model.predict_classes(X_val, batch_size=30)
print(confusion_matrix(np.argmax(y_val, axis=1), y_pred))
target_names = ['Cracked', 'Not Cracked']
print(classification_report(np.argmax(y_val, axis=1), y_pred, target_names=target_names))

In [None]:
# Confusion Matrix and Classification Report for Train
y_pred = model.predict_classes(X_test, batch_size=30)
print(confusion_matrix(np.argmax(y_test, axis=1), y_pred))
target_names = ['Cracked', 'Not Cracked']
print(classification_report(np.argmax(y_test, axis=1), y_pred, target_names=target_names))