# Concrete Crack Detection with PyTorch

## Table of contents

## Data Installation and Information

**Download the data**

In case you can't find it on the internet. I will provide a code here, which can be runned to install the dataset automatically into your local file system.

**About the Data**
- The dataset contains concrete images having cracks. The data is collected from various METU Campus Buildings.
- The dataset is divided into two as negative and positive crack images for image classification. 
- Each class has 20000images with a total of 40000 images with 227 x 227 pixels with RGB channels. 
- The dataset is generated from 458 high-resolution images (4032x3024 pixel) with the method proposed by Zhang et al (2016). 
- High-resolution images have variance in terms of surface finish and illumination conditions. 
- No data augmentation in terms of random rotation or flipping is applied. 

Cite: Özgenel, Çağlar Fırat (2019), “Concrete Crack Images for Classification”, Mendeley Data, V2, doi: 10.17632/5y9wdsg2zt.2

For more detail, you can check at this [link](https://data.mendeley.com/datasets/5y9wdsg2zt/2)

## Import Libraries and Auxiliary Functions

In [3]:
import os
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image

import torch
import torch.nn as nn
from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader

torch.manual_seed(0)

<torch._C.Generator at 0x11fa25550>

## Build Dataset

In [10]:
class Concrete_Dataset(Dataset):
    def __init__(self, transform=None, train=True):
        directory = "./resources/data"
        positive = "Positive"
        negative = 'Negative'

        positive_file_path = os.path.join(directory, positive)
        negative_file_path = os.path.join(directory, negative)
        positive_files = [os.path.join(positive_file_path, file) for file in os.listdir(positive_file_path) if file.endswith(".jpg")]
        negative_files = [os.path.join(negative_file_path, file) for file in os.listdir(negative_file_path) if file.endswith(".jpg")]
        number_of_samples = len(positive_files) + len(negative_files)
        self.all_files = [None] * number_of_samples
        self.all_files[::2] = positive_files
        self.all_files[1::2] = negative_files 

        self.transform = transform
        self.Y = torch.zeros([number_of_samples]).type(torch.LongTensor)
        self.Y[::2] = 1
        self.Y[1::2] = 0
        
        if train:
            self.all_files = self.all_files[0:30000]
            self.Y = self.Y[0:30000]
            self.len = len(self.all_files)
        else:
            self.all_files = self.all_files[30000:]
            self.Y = self.Y[30000:]
            self.len = len(self.all_files)     

    def __len__(self):
        return self.len
    
    def __getitem__(self, idx):
        # image = Image.open(self.all_files[idx])
        image = torch.load(self.all_files[idx])
        y = self.Y[idx]
                  
        if self.transform:
            image = self.transform(image)

        return image, y

In [None]:
train_dataset = Concrete_Dataset(train=True)
validation_dataset = Concrete_Dataset(train=False)

Load some samples

In [None]:
for sample in train_dataset[0:3]:
    plt.imshow(sample[0])
    plt.xlabel("y="+str(sample[1].item()))
    plt.title("training data, sample {}".format(int(sample)))
    plt.show()

## Build Model

In [None]:
# create a pretrained ResNet18
model = models.resnet18(pretrained=True)

for param in model.parameters():
    param.requires_grad = False

<code>resnet18</code> is used to classify 1000 different objects; as a result, the last layer has 1000 outputs.  The 512 inputs come from the fact that the previously hidden layer has 512 outputs. 

Then, we'll replace the output layer <code>model.fc</code> of the neural network with a <code>nn.Linear</code> object, to classify 2 different classes. For the parameters <code>in_features </code> remember the last hidden layer has 512 neurons.

In [None]:
number_of_classes = 2

model.fc = nn.Linear(512, number_of_classes)

print(model)

## Train Model

In [None]:
# create the loss function
criterion = nn.CrossEntropyLoss()

# create an optimizer for the model
optimizer = torch.optim.Adam([param for param in model.parameters() if param.requires_grad], lr=0.001)

# data loader for batch processing in training and validation
train_loader = DataLoader(dataset=train_dataset, batch_size=100)
validation_loader = DataLoader(dataset=validation_dataset, batch_size=100)

Now, we'll combine them all and start to train the model, the `loss` and `accuracy` will be recorded for further insights.

In [None]:
n_epochs = 1
loss_list = []
accuracy_list = []
N_test = len(validation_dataset)
N_train = len(train_dataset)

Loss = 0

for epoch in range(n_epochs):
    for x, y in train_loader:
        model.train() 
        # clear gradient 
        optimizer.zero_grad()
        # make a prediction 
        z = model(x)
        # calculate loss 
        loss = criterion(z, y)
        # calculate gradients of parameters 
        loss.backward()
        # update parameters 
        optimizer.step()
        loss_list.append(loss.data)

    correct = 0
    for x_test, y_test in validation_loader:
        # set model to eval 
        model.eval()
        # make a prediction 
        z = model(x_test)
        # find max 
        _, yhat = torch.max(z.data, 1)
       
        # calculate misclassified  samples in mini-batch 
        correct += (yhat == y_test).sum().item()  
        
    accuracy = correct / N_test
    accuracy_list.append(accuracy)

## Evaluation

In [None]:
print(f'Model accuracy: {accuracy}')

In [None]:
plt.plot(loss_list)
plt.xlabel("iteration")
plt.ylabel("loss")
plt.show()

We'll load some misclassified samples

In [None]:
misclassified_count = 0
validation_single_loader = DataLoader(dataset=validation_dataset, batch_size=1)
i = 0

for x_test, y_test in validation_single_loader:
    i += 1
    if misclassified_count == 4:
        break
        
    model.eval()
    z = model(x_test)
    _, yhat = torch.max(z.data, 1)

    misclassified = (yhat != y_test).nonzero()
    if len(misclassified) > 0:
        misclassified_count += 1
        print(f'sample {i} - predicted value: {yhat} - actual value: {y_test}')