# **Image Processing for detecting Cracks in Mine Pillars**


### Import Used Libraries

In [1]:
! pip install -q kaggle

In [2]:
# This connects your Google Drive to Colab so you can access your files stored there.
# Once you run this, you will be asked to grant Colab access to your Google Drive.

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
# Kaggle requires an API key (kaggle.json) to let you download datasets or use competitions.
# To get kaggle.json:
#  1. Go to https://www.kaggle.com/ -> click on your profile picture (top right) -> 'Account'.
#  2. Scroll down to the "API" section and click "Create New API Token".
#  3. A file called kaggle.json will be downloaded to your system.
#
# Now upload kaggle.json from your local machine to Colab (NOT optional).
# When you run the cell below, a file picker will appear, choose kaggle.json.

from google.colab import files
uploaded = files.upload()

Saving kaggle.json to kaggle (3).json


In [4]:
# making a new directory with name kaggle
! mkdir ~/.kaggle
# /moving kaggle.json to root\kaggle
! cp kaggle.json ~/.kaggle/



mkdir: cannot create directory ‘/root/.kaggle’: File exists


In [5]:
! kaggle competitions download -c 'arunrk7/surface-crack-detection'
! kaggle datasets download -d 'arunrk7/surface-crack-detection'


404 Client Error: Not Found for url: https://www.kaggle.com/api/v1/competitions/data/download-all/arunrk7/surface-crack-detection
Dataset URL: https://www.kaggle.com/datasets/arunrk7/surface-crack-detection
License(s): copyright-authors
surface-crack-detection.zip: Skipping, found more recently modified local copy (use --force to force download)


In [None]:
! unzip -q /content/surface-crack-detection.zip -d /content/surface-crack-detection


replace /content/surface-crack-detection/Negative/00001.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px


import os
from pathlib import Path

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, r2_score

import tensorflow as tf

### Get Positive & Negative Directories

In [None]:
positive_dir = Path(r'/content/surface-crack-detection/Positive')
negative_dir = Path(r'/content/surface-crack-detection/Negative')

In [None]:
for a,b,c in os.walk(r"/content/surface-crack-detection"):
  print(f"there are {len(b)} folders and {len(c)} files in {a}")

### Creating DataFrames

In [None]:
def generate_df(img_dir, label):

    file_paths = pd.Series(list(img_dir.glob(r'*.jpg')), name='Filepath').astype(str)
    labels = pd.Series(label, name='Label', index=file_paths.index)
    df = pd.concat([file_paths, labels], axis=1)
    return df

In [None]:
positive_df = generate_df(positive_dir, 'POSITIVE')
negative_df = generate_df(negative_dir, 'NEGATIVE')

all_df = pd.concat([positive_df, negative_df], axis=0).sample(frac=1, random_state=1).reset_index(drop=True)
print(all_df)

### Split the DataSet

In [None]:
train_df, test_df = train_test_split(all_df.sample(6000, random_state=1),
                train_size=0.7,
                shuffle=True,
                random_state=1)

### Loading Image Data

In [None]:
train_gen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255,
                                                           validation_split=0.2)

test_gen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)

In [None]:
train_data = train_gen.flow_from_dataframe(train_df,
                                          x_col='Filepath',
                                          y_col='Label',
                                          target_size=(120,120),
                                          color_mode='rgb',
                                          class_mode='binary',
                                          batch_size=32,
                                          shuffle=True,
                                          seed=42,
                                          subset='training')


val_data = train_gen.flow_from_dataframe(train_df,
                                          x_col='Filepath',
                                          y_col='Label',
                                          target_size=(120,120),
                                          color_mode='rgb',
                                          class_mode='binary',
                                          batch_size=32,
                                          shuffle=True,
                                          seed=42,
                                          subset='validation')


test_data = test_gen.flow_from_dataframe(test_df,
                                          x_col='Filepath',
                                          y_col='Label',
                                          target_size=(120,120),
                                          color_mode='rgb',
                                          class_mode='binary',
                                          batch_size=32,
                                          shuffle=False,
                                          seed=42)

In [None]:
test_data

### Model building

In [None]:
inputs = tf.keras.Input(shape=(120,120,3))
x = tf.keras.layers.Conv2D(filters=16, kernel_size=(3,3), activation='relu')(inputs)
x = tf.keras.layers.MaxPool2D(pool_size=(2,2))(x)
x = tf.keras.layers.Conv2D(filters=32, kernel_size=(3,3), activation='relu')(x)
x = tf.keras.layers.MaxPool2D(pool_size=(2,2))(x)

x = tf.keras.layers.GlobalAveragePooling2D()(x)
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)

In [None]:
model = tf.keras.Model(inputs=inputs, outputs=outputs)

In [None]:
model.compile(optimizer='adam',
             loss='binary_crossentropy',
             metrics=['accuracy'])

In [None]:
# print model summary
model.summary()

In [None]:
history = model.fit(train_data, validation_data=val_data, epochs=100,
                   callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                              patience=30,
                                                              restore_best_weights=True)
                             ])

### Plotting

In [None]:
fig = px.line(history.history,
             y=['loss', 'val_loss'],
             labels={'index':'Epoch'},
             title='Training and Validation Loss over Time')

fig.show()

### Final Results

In [None]:
results = model.evaluate(test_data, verbose=0)
loss = results[0]
accuracy = results[1]

print(f'Test Loss {loss:.5f}')
print(f'Test Accuracy {accuracy * 100:.2f} %')


# predicted y values
y_pred = np.squeeze((model.predict(test_data) >= 0.5).astype(int))
y_certain = np.squeeze((model.predict(test_data)).astype(int))

conf_matr = confusion_matrix(test_data.labels, y_pred)

class_report = classification_report(test_data.labels, y_pred,
                                         target_names=['NEGATIVE', 'POSITIVE'])

plt.figure(figsize=(6,6))

sns.heatmap(conf_matr, fmt='g', annot=True, cbar=False, vmin=0, cmap='Blues')

plt.xticks(ticks=np.arange(2) + 0.5, labels=['NEGATIVE', 'POSITIVE'])
plt.yticks(ticks=np.arange(2) + 0.5, labels=['NEGATIVE', 'POSITIVE'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

print('r2 Score : ', r2_score(test_data.labels, y_pred))
print()
print('Classification Report :\n......................\n', class_report)

### Testing New DataSet

In [None]:
def test_new_data(dir_path):

    new_test_dir = Path(dir_path)

    df_new = generate_df(new_test_dir, 'Testing')

    test_data_new = test_gen.flow_from_dataframe(df_new,
                                          x_col='Filepath',
                                          y_col='Label',
                                          target_size=(120,120),
                                          color_mode='rgb',
                                          batch_size=5,
                                          shuffle=False,
                                          seed=42)

    # predicted y values
    y_pred = np.squeeze((model.predict(test_data_new) >= 0.5).astype(int))


    y_certain = model.predict(test_data_new).round(6)
    y_out = []
    for i in y_pred:
        if i==0:
            y_out.append('Negative (Not Crack)')
        else:
            y_out.append('Positive(Crack) ')

    result = pd.DataFrame(np.c_[y_out, y_certain], columns=['Result', 'Confidance of being Cracked'])

    return result

In [None]:
#results = test_new_data(r'/content/drive/MyDrive/200_Check')

In [None]:
import matplotlib.pyplot as plt
import random
import cv2
import numpy as np

In [None]:
def func(path,model):
  y_true=random.choice(os.listdir(path))
  folder_path=os.path.join(path,y_true)
  file_path=os.path.join(folder_path,random.choice(os.listdir(folder_path)))
  img=cv2.imread(file_path)
  img=cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
  img=cv2.resize(img,(120,120))
  img_scalled=img.astype("float32")
  img_scalled=img_scalled/255.
  img_scalled=np.expand_dims(img_scalled,axis=0)
  predicted=model.predict(img_scalled,verbose=0)
  print(predicted[0][0])
  if predicted[0][0]<0.5:
    y_pred="negative"
  else:
    y_pred="positive"
  print("actual: ",y_true,"predicted: ",y_pred)
  plt.imshow(img)
  plt.axis("off")
  plt.show
func("/content/surface-crack-detection",model)