In [99]:
import numpy as np
import cv2
import os
import matplotlib.pyplot as plt
from PIL import Image
import random
import csv

In [43]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, precision_score, recall_score
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.ensemble import StackingClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, precision_score, recall_score

In [8]:
import joblib

## Constantes (paths, dimensões, etc)

In [85]:
base_dir_train_crack = 'dataset/train/crack/'
base_dir_train_non_crack = 'dataset/train/non-crack_random_subset/'
base_dir_test_crack = 'dataset/test/crack/'
base_dir_test_non_crack = 'dataset/test/non-crack_random_subset/'
base_dir_processed_train_images = 'processed-dataset-train/'
base_dir_trained_models = 'trained-models/'

In [10]:
preprocessed_image_name_prefix = 'preprocessed_'

In [100]:
svm_model_filename = 'svm_model.pkl'
rf_model_filename = 'rf_model.pkl'

In [150]:
testing_case_dir = 'testing-case-example/Testing/'
testing_case_csv_file_name = 'Testing.csv'

In [11]:
default_image_width = 256
default_image_height = 256

In [12]:
median_blur_kernel_size = 3
clahe_clip_limit = 4.0
clahe_tile_grid_size = (30, 30)
first_erosion_iterations = 3
first_dilation_iterations = 2
size_for_erosion_and_dilation_element = 2

element_for_erosion_and_dilation = \
    cv2.getStructuringElement(cv2.MORPH_CROSS, \
                              (2 * size_for_erosion_and_dilation_element + 1, 2 * size_for_erosion_and_dilation_element + 1), \
                              (size_for_erosion_and_dilation_element, size_for_erosion_and_dilation_element))

adapt_thresh_neighbour_size = 11
adapt_thresh_subtraction_constant = 2

sobel_kernel_size = 3

In [13]:
svm_param_grid_for_search = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf', 'poly'],
    'gamma': ['scale', 'auto']
}

In [83]:
rf_param_grid_for_search = {
    'n_estimators': [150, 250],
    'max_depth': [10, 20],
    'min_samples_split': [3, 7],
    'min_samples_leaf': [1, 2, 4]
}

## Funções de pré-processamento

In [14]:
def pre_process_with_grayscale_median_clahe_darkening_erosion_dilation( \
                                                                 images_list, \
                                                                 median_blur_kernel_size, \
                                                                 clahe_clip_limit, \
                                                                 clahe_tile_grid_size, \
                                                                 element_for_erosion_and_dilation, \
                                                                 first_erosion_iterations, \
                                                                 first_dilation_iterations):
    
    pre_processed_images = []
    clahe = cv2.createCLAHE(clipLimit = clahe_clip_limit, tileGridSize = clahe_tile_grid_size)
    
    for image in images_list:
        grayscale_image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) # converte para escala de cinzas
        blured_image = cv2.medianBlur(grayscale_image, median_blur_kernel_size) # aplica filtro de mediana
        clahe_image = clahe.apply(blured_image) # aplica equalizacao de histograma por blocos, uniformiza iluminacao
        min_pixel_value = np.min(clahe_image) # obtem o pixel de menor valor na imagem
        darker_image = clahe_image - min_pixel_value # escurece toda a imagem, subtraindo o valor do menor pixel
        
        erosion_image = cv2.erode(darker_image, \
                                  element_for_erosion_and_dilation, \
                                  iterations = first_erosion_iterations) # aplica n erosoes
        
        dilated_image = cv2.dilate(erosion_image, \
                                   element_for_erosion_and_dilation, \
                                   iterations = first_dilation_iterations) # aplica n dilatacoes
        
        pre_processed_images.append(dilated_image)
        
    return pre_processed_images

In [48]:
def pre_process_with_grayscale_median_clahe_darkening_erosion_dilation_adaptthreshold(
                                                                            images_list, \
                                                                            median_blur_kernel_size, \
                                                                            clahe_clip_limit, \
                                                                            clahe_tile_grid_size, \
                                                                            element_for_erosion_and_dilation, \
                                                                            first_erosion_iterations, \
                                                                            first_dilation_iterations, \
                                                                            adapt_thresh_neighbour_size, \
                                                                            adapt_thresh_subtraction_constant):
    
    pre_processed_images = []
    clahe = cv2.createCLAHE(clipLimit = clahe_clip_limit, tileGridSize = clahe_tile_grid_size)
    
    for image in images_list:
        grayscale_image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) # converte para escala de cinzas
        blured_image = cv2.medianBlur(grayscale_image, median_blur_kernel_size) # aplica filtro de mediana
        clahe_image = clahe.apply(blured_image) # aplica equalizacao de histograma por blocos, uniformiza iluminacao
        min_pixel_value = np.min(clahe_image) # obtem o pixel de menor valor na imagem
        darker_image = clahe_image - min_pixel_value # escurece toda a imagem, subtraindo o valor do menor pixel
        
        erosion_image = cv2.erode(darker_image, \
                                  element_for_erosion_and_dilation, \
                                  iterations = first_erosion_iterations) # aplica n erosoes
        
        dilated_image = cv2.dilate(erosion_image, \
                                   element_for_erosion_and_dilation, \
                                   iterations = first_dilation_iterations) # aplica n dilatacoes
        
        image_threshold = cv2.adaptiveThreshold(dilated_image, 255, \
                                                cv2.ADAPTIVE_THRESH_MEAN_C, \
                                                cv2.THRESH_BINARY, \
                                                adapt_thresh_neighbour_size, \
                                                adapt_thresh_subtraction_constant) # binarização por threshold adaptativo
        
        pre_processed_images.append(image_threshold)
        
    return pre_processed_images

In [16]:
def pre_process_with_grayscale_median_clahe_darkening_erosion_dilation_sobel(
                                                                   images_list, \
                                                                   median_blur_kernel_size, \
                                                                   clahe_clip_limit, \
                                                                   clahe_tile_grid_size, \
                                                                   element_for_erosion_and_dilation, \
                                                                   first_erosion_iterations, \
                                                                   first_dilation_iterations, \
                                                                   sobel_kernel_size):
    
    pre_processed_images = []
    clahe = cv2.createCLAHE(clipLimit = clahe_clip_limit, tileGridSize = clahe_tile_grid_size)
    
    for image in images_list:
        grayscale_image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) # converte para escala de cinzas
        blured_image = cv2.medianBlur(grayscale_image, median_blur_kernel_size) # aplica filtro de mediana
        clahe_image = clahe.apply(blured_image) # aplica equalizacao de histograma por blocos, uniformiza iluminacao
        min_pixel_value = np.min(clahe_image) # obtem o pixel de menor valor na imagem
        darker_image = clahe_image - min_pixel_value # escurece toda a imagem, subtraindo o valor do menor pixel
        
        erosion_image = cv2.erode(darker_image, \
                                  element_for_erosion_and_dilation, \
                                  iterations = first_erosion_iterations) # aplica 2 erosoes
        
        dilated_image = cv2.dilate(erosion_image, \
                                   element_for_erosion_and_dilation, \
                                   iterations = first_dilation_iterations) # aplica 2 dilatacoes
        
        gradient_x = cv2.Sobel(dilated_image, cv2.CV_64F, 1, 0, ksize=sobel_kernel_size)
        gradient_y = cv2.Sobel(dilated_image, cv2.CV_64F, 0, 1, ksize=sobel_kernel_size)
        gradient_magnitude = np.sqrt(gradient_x**2 + gradient_y**2)
        gradient_magnitude = cv2.convertScaleAbs(gradient_magnitude) # obtém o filtro sobel horizontal e vertical
        
        pre_processed_images.append(gradient_magnitude)
        
    return pre_processed_images

In [130]:
def pre_process_with_median_clahe_darkening_erosion_dilation( \
                                                             images_list, \
                                                             median_blur_kernel_size, \
                                                             clahe_clip_limit, \
                                                             clahe_tile_grid_size, \
                                                             element_for_erosion_and_dilation, \
                                                             first_erosion_iterations, \
                                                             first_dilation_iterations):
    
    pre_processed_images = []
    clahe = cv2.createCLAHE(clipLimit = clahe_clip_limit, tileGridSize = clahe_tile_grid_size)
    
    for image in images_list:
        blured_image = cv2.medianBlur(image, median_blur_kernel_size) # aplica filtro de mediana
        img_lab = cv2.cvtColor(blured_image, cv2.COLOR_RGB2Lab)
        img_lab[:,:,0] = clahe.apply(img_lab[:,:,0])
        clahe_image = cv2.cvtColor(img_lab, cv2.COLOR_Lab2RGB)
        
        min_pixel_value = np.min(clahe_image) # obtem o pixel de menor valor na imagem
        darker_image = clahe_image - min_pixel_value # escurece toda a imagem, subtraindo o valor do menor pixel
        
        erosion_image = cv2.erode(darker_image, \
                                  element_for_erosion_and_dilation, \
                                  iterations = first_erosion_iterations) # aplica n erosoes
        
        dilated_image = cv2.dilate(erosion_image, \
                                   element_for_erosion_and_dilation, \
                                   iterations = first_dilation_iterations) # aplica n dilatacoes
        
        pre_processed_images.append(dilated_image)
        
    return pre_processed_images

## Demais funções

In [24]:
def load_images_from_name_list(base_dir, list_with_names, default_image_width, default_image_height):
    loaded_images = []
    for image_name in list_with_names:
        im = cv2.imread(base_dir + image_name)
        im_correct_colorscheme = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        im_resized = cv2.resize(im_correct_colorscheme, (default_image_width, default_image_height))
        loaded_images.append(im_resized)
    return loaded_images

In [18]:
def save_images(base_dir, images_list, images_names_list, image_name_prefix, directory_to_save):
    for idx in range(len(images_names_list)):
        
        if not os.path.exists(base_dir + directory_to_save):
            os.makedirs(base_dir + directory_to_save)
            
        cv2.imwrite(base_dir + directory_to_save + '/' + image_name_prefix + images_names_list[idx], images_list[idx])

In [19]:
def get_data_and_labels(pre_processed_train_crack, pre_processed_train_non_crack):
    input_masks = []
    input_masks.extend(pre_processed_train_crack)
    input_masks.extend(pre_processed_train_non_crack) # adicionamos todas as imagens à lista de input do modelo
    input_labels = []
    input_labels.extend(np.ones((len(pre_processed_train_crack), ), np.uint8))
    input_labels.extend(np.zeros((len(pre_processed_train_non_crack), ), np.uint8)) # adicionamos as labels das imagens

    input_masks = list(map(lambda x:x.flatten(), input_masks)) # aqui, redimensionamos as imagens para terem 1 dimensao

    zipped_list_for_shuffle = list(zip(input_masks, input_labels))
    random.shuffle(zipped_list_for_shuffle)
    
    shuffled_input_masks, shuffled_input_labels = zip(*zipped_list_for_shuffle)
    
    return list(shuffled_input_masks), list(shuffled_input_labels)

## Carregando conjunto de dados

In [20]:
crack_images_for_train_name_list = os.listdir(base_dir_train_crack)
non_crack_images_for_train_name_list = os.listdir(base_dir_train_non_crack)

crack_images_for_test_name_list = os.listdir(base_dir_test_crack)
non_crack_images_for_test_name_list = os.listdir(base_dir_test_non_crack)

In [25]:
crack_images_for_train_list = load_images_from_name_list(base_dir_train_crack, \
                                                         crack_images_for_train_name_list, \
                                                         default_image_width, default_image_height)

In [26]:
non_crack_images_for_train_list = load_images_from_name_list(base_dir_train_non_crack, \
                                                             non_crack_images_for_train_name_list, \
                                                             default_image_width, default_image_height)

In [30]:
crack_images_for_test_list = load_images_from_name_list(base_dir_test_crack, \
                                                         crack_images_for_test_name_list, \
                                                         default_image_width, default_image_height)

In [31]:
non_crack_images_for_test_list = load_images_from_name_list(base_dir_test_non_crack, \
                                                             non_crack_images_for_test_name_list, \
                                                             default_image_width, default_image_height)

## Treinamento dos modelos

### Pré-processamento 1: Conversão escala de cinzas, filtro mediana, CLAHE, escurecimento, erosão e dilatação

In [32]:
pre_processed_train_crack_1 = pre_process_with_grayscale_median_clahe_darkening_erosion_dilation( \
                                                                                       crack_images_for_train_list, \
                                                                                       median_blur_kernel_size, \
                                                                                       clahe_clip_limit, \
                                                                                       clahe_tile_grid_size, \
                                                                                       element_for_erosion_and_dilation, \
                                                                                       first_erosion_iterations, \
                                                                                       first_dilation_iterations)

In [33]:
pre_processed_train_non_crack_1 = pre_process_with_grayscale_median_clahe_darkening_erosion_dilation( \
                                                                                           non_crack_images_for_train_list, \
                                                                                           median_blur_kernel_size, \
                                                                                           clahe_clip_limit, \
                                                                                           clahe_tile_grid_size, \
                                                                                           element_for_erosion_and_dilation, \
                                                                                           first_erosion_iterations, \
                                                                                           first_dilation_iterations)

In [34]:
pre_processed_test_crack_1 = pre_process_with_grayscale_median_clahe_darkening_erosion_dilation( \
                                                                                       crack_images_for_test_list, \
                                                                                       median_blur_kernel_size, \
                                                                                       clahe_clip_limit, \
                                                                                       clahe_tile_grid_size, \
                                                                                       element_for_erosion_and_dilation, \
                                                                                       first_erosion_iterations, \
                                                                                       first_dilation_iterations)

In [35]:
pre_processed_test_non_crack_1 = pre_process_with_grayscale_median_clahe_darkening_erosion_dilation( \
                                                                                           non_crack_images_for_test_list, \
                                                                                           median_blur_kernel_size, \
                                                                                           clahe_clip_limit, \
                                                                                           clahe_tile_grid_size, \
                                                                                           element_for_erosion_and_dilation, \
                                                                                           first_erosion_iterations, \
                                                                                           first_dilation_iterations)

In [36]:
save_images(base_dir_processed_train_images, \
            pre_processed_train_crack_1, \
            crack_images_for_train_name_list, \
            preprocessed_image_name_prefix, \
            'crack_1')

In [37]:
save_images(base_dir_processed_train_images, \
            pre_processed_train_non_crack_1, \
            non_crack_images_for_train_name_list, \
            preprocessed_image_name_prefix, \
            'non-crack_1')

#### SVM

In [80]:
input_data_1, input_labels_1 = get_data_and_labels(pre_processed_train_crack_1, pre_processed_train_non_crack_1)

In [39]:
svm_model_1 = SVC()
grid_search_svm_1 = GridSearchCV(svm_model_1, svm_param_grid_for_search, cv=5, scoring='accuracy')
grid_search_svm_1.fit(input_data_1, input_labels_1)

GridSearchCV(cv=5, estimator=SVC(),
             param_grid={'C': [0.1, 1, 10], 'gamma': ['scale', 'auto'],
                         'kernel': ['linear', 'rbf', 'poly']},
             scoring='accuracy')

In [81]:
test_data_1, test_labels_1 = get_data_and_labels(pre_processed_test_crack_1, pre_processed_test_non_crack_1)

In [41]:
print("Best Hyperparameters SVM 1:", grid_search_svm_1.best_params_)
print("Best Accuracy SVM 1:", grid_search_svm_1.best_score_)
best_svm_model_1 = grid_search_svm_1.best_estimator_
accuracy_1 = best_svm_model_1.score(test_data_1, test_labels_1)
print("Test Set Accuracy SVM 1:", accuracy_1)

Best Hyperparameters SVM 1: {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}
Best Accuracy SVM 1: 0.8333333333333333
Test Set Accuracy SVM 1: 0.8407643312101911


In [42]:
svm_test_prediction_1 = best_svm_model_1.predict(test_data_1)

In [45]:
accuracy_svm_1 = accuracy_score(svm_test_prediction_1, test_labels_1)
precision_svm_1 = precision_score(test_labels_1, svm_test_prediction_1)
recall_svm_1 = recall_score(test_labels_1, svm_test_prediction_1)
f1_svm_1 = f1_score(svm_test_prediction_1, test_labels_1)

In [58]:
print("Precision:", precision_svm_1)
print("Recall:", recall_svm_1)
print("F1-SCORE:", f1_svm_1)

Precision: 0.8513513513513513
Recall: 0.8181818181818182
F1-SCORE: 0.8344370860927152


In [76]:
conf_matrix_svm_1 = confusion_matrix(svm_test_prediction_1, test_labels_1)
conf_matrix_svm_1

array([[69, 14],
       [11, 63]], dtype=int64)

In [86]:
joblib.dump(best_svm_model_1, base_dir_trained_models + 'preprocess1_' + svm_model_filename)

['trained-models/preprocess1_svm_model.pkl']

#### Random Forest

In [82]:
rf_model_1 = RandomForestClassifier()
grid_search_rf_1 = GridSearchCV(rf_model_1, rf_param_grid_for_search, cv=5, scoring='accuracy')
grid_search_rf_1.fit(input_data_1, input_labels_1)

GridSearchCV(cv=5, estimator=RandomForestClassifier(),
             param_grid={'max_depth': [None, 10, 20],
                         'min_samples_leaf': [1, 2, 4],
                         'min_samples_split': [2, 5, 10],
                         'n_estimators': [100, 200, 300]},
             scoring='accuracy')

In [87]:
print("Best Hyperparameters RF 1:", grid_search_rf_1.best_params_)
print("Best Accuracy RF 1:", grid_search_rf_1.best_score_)
best_model_rf_1 = grid_search_rf_1.best_estimator_
accuracy_rf_1 = best_model_rf_1.score(test_data_1, test_labels_1)
print("Test Set Accuracy:", accuracy_rf_1)

Best Hyperparameters RF 1: {'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 100}
Best Accuracy RF 1: 0.8300000000000001
Test Set Accuracy: 0.8407643312101911


In [88]:
rf_test_prediction_1 = best_model_rf_1.predict(test_data_1)

In [89]:
precision_rf_1 = precision_score(test_labels_1, rf_test_prediction_1)
recall_rf_1 = recall_score(test_labels_1, rf_test_prediction_1)
f1_rf_1 = f1_score(rf_test_prediction_1, test_labels_1)

In [90]:
print("Precision RF 1:", precision_rf_1)
print("Recall RF 1:", recall_rf_1)
print("F1-SCORE RF 1:", f1_rf_1)

Precision RF 1: 0.8095238095238095
Recall RF 1: 0.8831168831168831
F1-SCORE RF 1: 0.84472049689441


In [91]:
conf_matrix_rf_1 = confusion_matrix(rf_test_prediction_1, test_labels_1)
conf_matrix_rf_1

array([[64,  9],
       [16, 68]], dtype=int64)

In [106]:
joblib.dump(best_model_rf_1, base_dir_trained_models + 'preprocess1_' + rf_model_filename)

['trained-models/preprocess1_rf_model.pkl']

#### Modelo híbrido (StackingClassifier)

In [107]:
stacked_model_1 = StackingClassifier(estimators=[('rf', best_model_rf_1), ('svm', best_svm_model_1)], \
                                     final_estimator=RandomForestClassifier())
stacked_model_1.fit(input_data_1, input_labels_1)

StackingClassifier(estimators=[('rf',
                                RandomForestClassifier(max_depth=10,
                                                       min_samples_split=5)),
                               ('svm', SVC(C=10))],
                   final_estimator=RandomForestClassifier())

In [108]:
sc_predictions_1 = stacked_model_1.predict(test_data_1)

In [109]:
accuracy_s_1 = accuracy_score(sc_predictions_1, test_labels_1)
precision_s_1 = precision_score(test_labels_1, sc_predictions_1)
recall_s_1 = recall_score(test_labels_1, sc_predictions_1)
f1_s_1 = f1_score(sc_predictions_1, test_labels_1)

In [110]:
print("Acurracy SM 1:", accuracy_s_1)
print("Precision SM 1:", precision_s_1)
print("Recall SM 1:", recall_s_1)
print("F1-SCORE SM 1:", f1_s_1)

Acurracy SM 1: 0.7770700636942676
Precision SM 1: 0.7837837837837838
Recall SM 1: 0.7532467532467533
F1-SCORE SM 1: 0.7682119205298014


#### Modelo híbrido (VotationClassifier)

In [117]:
svm_model_votation = SVC(probability=True, C = 10, gamma = 'scale', kernel = 'rbf')

In [118]:
svm_model_votation.fit(input_data_1, input_labels_1)

SVC(C=10, probability=True)

In [119]:
votation_model_1 = VotingClassifier(estimators=[('rf', best_model_rf_1), ('svm', svm_model_votation)], voting='soft')
votation_model_1.fit(input_data_1, input_labels_1)

VotingClassifier(estimators=[('rf',
                              RandomForestClassifier(max_depth=10,
                                                     min_samples_split=5)),
                             ('svm', SVC(C=10, probability=True))],
                 voting='soft')

In [120]:
vt_predictions_1 = votation_model_1.predict(test_data_1)

In [122]:
accuracy_v_1 = accuracy_score(vt_predictions_1, test_labels_1)
precision_v_1 = precision_score(test_labels_1, vt_predictions_1)
recall_v_1 = recall_score(test_labels_1, vt_predictions_1)
f1_v_1 = f1_score(vt_predictions_1, test_labels_1)

In [123]:
print("Acurracy SM 1:", accuracy_v_1)
print("Precision SM 1:", precision_v_1)
print("Recall SM 1:", recall_v_1)
print("F1-SCORE SM 1:", f1_v_1)

Acurracy SM 1: 0.8535031847133758
Precision SM 1: 0.8214285714285714
Recall SM 1: 0.8961038961038961
F1-SCORE SM 1: 0.8571428571428571


### Pré-processamento 2: Conversão escala de cinzas, filtro mediana, CLAHE, escurecimento, erosão, dilatação e threshold adaptativo

In [49]:
pre_processed_train_crack_2 = \
    pre_process_with_grayscale_median_clahe_darkening_erosion_dilation_adaptthreshold( \
                                                                            crack_images_for_train_list, \
                                                                            median_blur_kernel_size, \
                                                                            clahe_clip_limit, \
                                                                            clahe_tile_grid_size, \
                                                                            element_for_erosion_and_dilation, \
                                                                            first_erosion_iterations, \
                                                                            first_dilation_iterations, \
                                                                            adapt_thresh_neighbour_size, \
                                                                            adapt_thresh_subtraction_constant)

In [50]:
pre_processed_train_non_crack_2 = \
    pre_process_with_grayscale_median_clahe_darkening_erosion_dilation_adaptthreshold( \
                                                                            non_crack_images_for_train_list, \
                                                                            median_blur_kernel_size, \
                                                                            clahe_clip_limit, \
                                                                            clahe_tile_grid_size, \
                                                                            element_for_erosion_and_dilation, \
                                                                            first_erosion_iterations, \
                                                                            first_dilation_iterations, \
                                                                            adapt_thresh_neighbour_size, \
                                                                            adapt_thresh_subtraction_constant)

In [51]:
pre_processed_test_crack_2 = pre_process_with_grayscale_median_clahe_darkening_erosion_dilation_adaptthreshold( \
                                                                            crack_images_for_test_list, \
                                                                            median_blur_kernel_size, \
                                                                            clahe_clip_limit, \
                                                                            clahe_tile_grid_size, \
                                                                            element_for_erosion_and_dilation, \
                                                                            first_erosion_iterations, \
                                                                            first_dilation_iterations, \
                                                                            adapt_thresh_neighbour_size, \
                                                                            adapt_thresh_subtraction_constant)

In [52]:
pre_processed_test_non_crack_2 = \
    pre_process_with_grayscale_median_clahe_darkening_erosion_dilation_adaptthreshold( \
                                                                            non_crack_images_for_test_list, \
                                                                            median_blur_kernel_size, \
                                                                            clahe_clip_limit, \
                                                                            clahe_tile_grid_size, \
                                                                            element_for_erosion_and_dilation, \
                                                                            first_erosion_iterations, \
                                                                            first_dilation_iterations, \
                                                                            adapt_thresh_neighbour_size, \
                                                                            adapt_thresh_subtraction_constant)

In [53]:
save_images(base_dir_processed_train_images, \
            pre_processed_train_crack_2, \
            crack_images_for_train_name_list, \
            preprocessed_image_name_prefix, \
            'crack_2')

In [54]:
save_images(base_dir_processed_train_images, \
            pre_processed_train_non_crack_2, \
            non_crack_images_for_train_name_list, \
            preprocessed_image_name_prefix, \
            'non-crack_2')

Obtendo dados e labels

In [55]:
input_data_2, input_labels_2 = get_data_and_labels(pre_processed_train_crack_2, pre_processed_train_non_crack_2)

In [56]:
test_data_2, test_labels_2 = get_data_and_labels(pre_processed_test_crack_2, pre_processed_test_non_crack_2)

#### SVM

In [57]:
svm_model_2 = SVC()
grid_search_svm_2 = GridSearchCV(svm_model_2, svm_param_grid_for_search, cv=5, scoring='accuracy')
grid_search_svm_2.fit(input_data_2, input_labels_2)

GridSearchCV(cv=5, estimator=SVC(),
             param_grid={'C': [0.1, 1, 10], 'gamma': ['scale', 'auto'],
                         'kernel': ['linear', 'rbf', 'poly']},
             scoring='accuracy')

In [59]:
print("Best Hyperparameters SVM 2:", grid_search_svm_2.best_params_)
print("Best Accuracy SVM 2:", grid_search_svm_2.best_score_)
best_svm_model_2 = grid_search_svm_2.best_estimator_
accuracy_2 = best_svm_model_2.score(test_data_2, test_labels_2)
print("Test Set Accuracy SVM 2:", accuracy_2)

Best Hyperparameters SVM 2: {'C': 1, 'gamma': 'scale', 'kernel': 'rbf'}
Best Accuracy SVM 2: 0.7150000000000001
Test Set Accuracy SVM 2: 0.6687898089171974


In [60]:
svm_test_prediction_2 = best_svm_model_2.predict(test_data_2)

In [61]:
precision_svm_2 = precision_score(test_labels_2, svm_test_prediction_2)
recall_svm_2 = recall_score(test_labels_2, svm_test_prediction_2)
f1_svm_2 = f1_score(svm_test_prediction_2, test_labels_2)

In [62]:
print("Precision:", precision_svm_2)
print("Recall:", recall_svm_2)
print("F1-SCORE:", f1_svm_2)

Precision: 0.6436781609195402
Recall: 0.7272727272727273
F1-SCORE: 0.6829268292682927


In [77]:
conf_matrix_svm_2 = confusion_matrix(svm_test_prediction_2, test_labels_2)
conf_matrix_svm_2

array([[49, 21],
       [31, 56]], dtype=int64)

#### Random Forest

In [92]:
rf_model_2 = RandomForestClassifier()
grid_search_rf_2 = GridSearchCV(rf_model_2, rf_param_grid_for_search, cv=5, scoring='accuracy')
grid_search_rf_2.fit(input_data_2, input_labels_2)

GridSearchCV(cv=5, estimator=RandomForestClassifier(),
             param_grid={'max_depth': [10, 20], 'min_samples_leaf': [1, 2, 4],
                         'min_samples_split': [3, 7],
                         'n_estimators': [150, 250]},
             scoring='accuracy')

In [93]:
print("Best Hyperparameters RF 2:", grid_search_rf_2.best_params_)
print("Best Accuracy RF 2:", grid_search_rf_2.best_score_)
best_model_rf_2 = grid_search_rf_2.best_estimator_
accuracy_rf_2 = best_model_rf_2.score(test_data_2, test_labels_2)
print("Test Set Accuracy RF 2:", accuracy_rf_2)

Best Hyperparameters RF 2: {'max_depth': 20, 'min_samples_leaf': 1, 'min_samples_split': 7, 'n_estimators': 250}
Best Accuracy RF 2: 0.72
Test Set Accuracy RF 2: 0.6496815286624203


In [94]:
rf_test_prediction_2 = best_model_rf_2.predict(test_data_2)

In [95]:
precision_rf_2 = precision_score(test_labels_2, rf_test_prediction_2)
recall_rf_2 = recall_score(test_labels_2, rf_test_prediction_2)
f1_rf_2 = f1_score(rf_test_prediction_2, test_labels_2)

In [96]:
print("Precision RF 2:", precision_rf_2)
print("Recall RF 2:", recall_rf_2)
print("F1-SCORE RF 2:", f1_rf_2)

Precision RF 2: 0.627906976744186
Recall RF 2: 0.7012987012987013
F1-SCORE RF 2: 0.6625766871165645


In [97]:
conf_matrix_rf_2 = confusion_matrix(rf_test_prediction_2, test_labels_2)
conf_matrix_rf_2

array([[48, 23],
       [32, 54]], dtype=int64)

#### Modelo híbrido (StackingClassifier)

In [124]:
stacked_model_2 = StackingClassifier(estimators=[('rf', best_model_rf_2), ('svm', best_svm_model_2)], \
                                     final_estimator=RandomForestClassifier())
stacked_model_2.fit(input_data_2, input_labels_2)

StackingClassifier(estimators=[('rf',
                                RandomForestClassifier(max_depth=20,
                                                       min_samples_split=7,
                                                       n_estimators=250)),
                               ('svm', SVC(C=1))],
                   final_estimator=RandomForestClassifier())

### Pré-processamento 3: Conversão escala de cinzas, filtro mediana, CLAHE, escurecimento, erosão, dilatação e filtro Sobel

In [63]:
pre_processed_train_crack_3 = \
    pre_process_with_grayscale_median_clahe_darkening_erosion_dilation_sobel( \
                                                                   crack_images_for_train_list, \
                                                                   median_blur_kernel_size, \
                                                                   clahe_clip_limit, \
                                                                   clahe_tile_grid_size, \
                                                                   element_for_erosion_and_dilation, \
                                                                   first_erosion_iterations, \
                                                                   first_dilation_iterations, \
                                                                   sobel_kernel_size)

In [64]:
pre_processed_train_non_crack_3 = \
    pre_process_with_grayscale_median_clahe_darkening_erosion_dilation_sobel( \
                                                                   non_crack_images_for_train_list, \
                                                                   median_blur_kernel_size, \
                                                                   clahe_clip_limit, \
                                                                   clahe_tile_grid_size, \
                                                                   element_for_erosion_and_dilation, \
                                                                   first_erosion_iterations, \
                                                                   first_dilation_iterations, \
                                                                   sobel_kernel_size)

In [65]:
pre_processed_test_crack_3 = \
    pre_process_with_grayscale_median_clahe_darkening_erosion_dilation_sobel( \
                                                                   crack_images_for_test_list, \
                                                                   median_blur_kernel_size, \
                                                                   clahe_clip_limit, \
                                                                   clahe_tile_grid_size, \
                                                                   element_for_erosion_and_dilation, \
                                                                   first_erosion_iterations, \
                                                                   first_dilation_iterations, \
                                                                   sobel_kernel_size)

In [66]:
pre_processed_test_non_crack_3 = \
    pre_process_with_grayscale_median_clahe_darkening_erosion_dilation_sobel( \
                                                                   non_crack_images_for_test_list, \
                                                                   median_blur_kernel_size, \
                                                                   clahe_clip_limit, \
                                                                   clahe_tile_grid_size, \
                                                                   element_for_erosion_and_dilation, \
                                                                   first_erosion_iterations, \
                                                                   first_dilation_iterations, \
                                                                   sobel_kernel_size)

In [67]:
save_images(base_dir_processed_train_images, \
            pre_processed_train_crack_3, \
            crack_images_for_train_name_list, \
            preprocessed_image_name_prefix, \
            'crack_3')

In [68]:
save_images(base_dir_processed_train_images, \
            pre_processed_train_non_crack_3, \
            non_crack_images_for_train_name_list, \
            preprocessed_image_name_prefix, \
            'non-crack_3')

Obtendo dados e labels

In [69]:
input_data_3, input_labels_3 = get_data_and_labels(pre_processed_train_crack_3, pre_processed_train_non_crack_3)

In [70]:
test_data_3, test_labels_3 = get_data_and_labels(pre_processed_test_crack_3, pre_processed_test_non_crack_3)

#### SVM

In [71]:
svm_model_3 = SVC()
grid_search_svm_3 = GridSearchCV(svm_model_3, svm_param_grid_for_search, cv=5, scoring='accuracy')
grid_search_svm_3.fit(input_data_3, input_labels_3)

GridSearchCV(cv=5, estimator=SVC(),
             param_grid={'C': [0.1, 1, 10], 'gamma': ['scale', 'auto'],
                         'kernel': ['linear', 'rbf', 'poly']},
             scoring='accuracy')

In [72]:
print("Best Hyperparameters SVM 3:", grid_search_svm_3.best_params_)
print("Best Accuracy SVM 3:", grid_search_svm_3.best_score_)
best_svm_model_3 = grid_search_svm_3.best_estimator_
accuracy_3 = best_svm_model_3.score(test_data_3, test_labels_3)
print("Test Set Accuracy SVM 3:", accuracy_3)

Best Hyperparameters SVM 3: {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}
Best Accuracy SVM 3: 0.8666666666666666
Test Set Accuracy SVM 3: 0.821656050955414


In [73]:
svm_test_prediction_3 = best_svm_model_3.predict(test_data_3)

In [74]:
precision_svm_3 = precision_score(test_labels_3, svm_test_prediction_3)
recall_svm_3 = recall_score(test_labels_3, svm_test_prediction_3)
f1_svm_3 = f1_score(svm_test_prediction_3, test_labels_3)

In [75]:
print("Precision:", precision_svm_3)
print("Recall:", recall_svm_3)
print("F1-SCORE:", f1_svm_3)

Precision: 0.8024691358024691
Recall: 0.8441558441558441
F1-SCORE: 0.8227848101265822


In [78]:
conf_matrix_svm_3 = confusion_matrix(svm_test_prediction_3, test_labels_3)
conf_matrix_svm_3

array([[64, 12],
       [16, 65]], dtype=int64)

#### Random Forest

In [98]:
rf_model_3 = RandomForestClassifier()
grid_search_rf_3 = GridSearchCV(rf_model_3, rf_param_grid_for_search, cv=5, scoring='accuracy')
grid_search_rf_3.fit(input_data_3, input_labels_3)

GridSearchCV(cv=5, estimator=RandomForestClassifier(),
             param_grid={'max_depth': [10, 20], 'min_samples_leaf': [1, 2, 4],
                         'min_samples_split': [3, 7],
                         'n_estimators': [150, 250]},
             scoring='accuracy')

In [101]:
print("Best Hyperparameters RF 3:", grid_search_rf_3.best_params_)
print("Best Accuracy RF 3:", grid_search_rf_3.best_score_)
best_model_rf_3 = grid_search_rf_3.best_estimator_
accuracy_rf_3 = best_model_rf_3.score(test_data_3, test_labels_3)
print("Test Set Accuracy RF 3:", accuracy_rf_3)

Best Hyperparameters RF 3: {'max_depth': 20, 'min_samples_leaf': 2, 'min_samples_split': 7, 'n_estimators': 250}
Best Accuracy RF 3: 0.8116666666666668
Test Set Accuracy RF 3: 0.7197452229299363


In [102]:
rf_test_prediction_3 = best_model_rf_3.predict(test_data_3)

In [103]:
precision_rf_3 = precision_score(test_labels_3, rf_test_prediction_3)
recall_rf_3 = recall_score(test_labels_3, rf_test_prediction_3)
f1_rf_3 = f1_score(rf_test_prediction_3, test_labels_3)

In [104]:
print("Precision RF 3:", precision_rf_3)
print("Recall RF 3:", recall_rf_3)
print("F1-SCORE RF 3:", f1_rf_3)

Precision RF 3: 0.7037037037037037
Recall RF 3: 0.7402597402597403
F1-SCORE RF 3: 0.7215189873417721


In [105]:
conf_matrix_rf_3 = confusion_matrix(rf_test_prediction_3, test_labels_3)
conf_matrix_rf_3

array([[56, 20],
       [24, 57]], dtype=int64)

### Pré-processamento 3: Filtro mediana, CLAHE, escurecimento, erosão, dilatação e filtro Sobel

In [131]:
pre_processed_train_crack_4 = \
    pre_process_with_median_clahe_darkening_erosion_dilation( \
                                                       crack_images_for_train_list, \
                                                       median_blur_kernel_size, \
                                                       clahe_clip_limit, \
                                                       clahe_tile_grid_size, \
                                                       element_for_erosion_and_dilation, \
                                                       first_erosion_iterations, \
                                                       first_dilation_iterations)

In [132]:
pre_processed_train_non_crack_4 = \
    pre_process_with_median_clahe_darkening_erosion_dilation( \
                                                       non_crack_images_for_train_list, \
                                                       median_blur_kernel_size, \
                                                       clahe_clip_limit, \
                                                       clahe_tile_grid_size, \
                                                       element_for_erosion_and_dilation, \
                                                       first_erosion_iterations, \
                                                       first_dilation_iterations)

In [133]:
pre_processed_test_crack_4 = \
    pre_process_with_median_clahe_darkening_erosion_dilation( \
                                                           crack_images_for_test_list, \
                                                           median_blur_kernel_size, \
                                                           clahe_clip_limit, \
                                                           clahe_tile_grid_size, \
                                                           element_for_erosion_and_dilation, \
                                                           first_erosion_iterations, \
                                                           first_dilation_iterations)

In [134]:
pre_processed_test_non_crack_4 = \
    pre_process_with_median_clahe_darkening_erosion_dilation( \
                                                           non_crack_images_for_test_list, \
                                                           median_blur_kernel_size, \
                                                           clahe_clip_limit, \
                                                           clahe_tile_grid_size, \
                                                           element_for_erosion_and_dilation, \
                                                           first_erosion_iterations, \
                                                           first_dilation_iterations)

In [135]:
save_images(base_dir_processed_train_images, \
            pre_processed_train_crack_4, \
            crack_images_for_train_name_list, \
            preprocessed_image_name_prefix, \
            'crack_4')

In [136]:
save_images(base_dir_processed_train_images, \
            pre_processed_train_non_crack_3, \
            non_crack_images_for_train_name_list, \
            preprocessed_image_name_prefix, \
            'non-crack_4')

Obtendo dados e labels

In [137]:
input_data_4, input_labels_4 = get_data_and_labels(pre_processed_train_crack_4, pre_processed_train_non_crack_4)

In [138]:
test_data_4, test_labels_4 = get_data_and_labels(pre_processed_test_crack_4, pre_processed_test_non_crack_4)

#### SVM

In [139]:
svm_model_4 = SVC()
grid_search_svm_4 = GridSearchCV(svm_model_4, svm_param_grid_for_search, cv=5, scoring='accuracy')
grid_search_svm_4.fit(input_data_4, input_labels_4)

GridSearchCV(cv=5, estimator=SVC(),
             param_grid={'C': [0.1, 1, 10], 'gamma': ['scale', 'auto'],
                         'kernel': ['linear', 'rbf', 'poly']},
             scoring='accuracy')

In [140]:
print("Best Hyperparameters SVM 4:", grid_search_svm_4.best_params_)
print("Best Accuracy SVM 4:", grid_search_svm_4.best_score_)
best_svm_model_4 = grid_search_svm_4.best_estimator_
accuracy_4 = best_svm_model_4.score(test_data_4, test_labels_4)
print("Test Set Accuracy SVM 4:", accuracy_4)

Best Hyperparameters SVM 4: {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}
Best Accuracy SVM 4: 0.805
Test Set Accuracy SVM 4: 0.8152866242038217


In [141]:
svm_test_prediction_4 = best_svm_model_4.predict(test_data_4)

In [142]:
precision_svm_4 = precision_score(test_labels_4, svm_test_prediction_4)
recall_svm_4 = recall_score(test_labels_4, svm_test_prediction_4)
f1_svm_4 = f1_score(svm_test_prediction_4, test_labels_4)

In [143]:
print("Precision SVM 4:", precision_svm_4)
print("Recall SVM 4:", recall_svm_4)
print("F1-SCORE SVM 4:", f1_svm_4)

Precision SVM 4: 0.875
Recall SVM 4: 0.7272727272727273
F1-SCORE SVM 4: 0.7943262411347517


In [144]:
conf_matrix_svm_4 = confusion_matrix(svm_test_prediction_4, test_labels_4)
conf_matrix_svm_4

array([[72, 21],
       [ 8, 56]], dtype=int64)

In [145]:
joblib.dump(best_svm_model_4, base_dir_trained_models + 'preprocess4_' + svm_model_filename)

['trained-models/preprocess4_svm_model.pkl']

## Melhor modelo obtido

In [146]:
best_model_file_name = 'preprocess1_svm_model.pkl'

## Casos de teste

In [181]:
#best_model = joblib.load(trained_models_dir + best_model_file_name)
#best_model = best_model_rf_1
best_model = best_svm_model_1

In [182]:
testcase_image_names = []
testcase_image_classes = []

In [183]:
with open(testing_case_dir + testing_case_csv_file_name, 'r') as csv_file:
    csv_reader = csv.reader(csv_file)
    
    next(csv_reader)
    
    for row in csv_reader:
        testcase_image_names.append(row[0])
        testcase_image_classes.append(int(row[1]))

In [166]:
testcase_image_names

['testing_images/t_01.jpg',
 'testing_images/t_02.jpg',
 'testing_images/t_03.jpg',
 'testing_images/t_04.jpg',
 'testing_images/t_05.jpg',
 'testing_images/t_06.jpg',
 'testing_images/t_07.jpg',
 'testing_images/t_08.jpg',
 'testing_images/t_09.jpg',
 'testing_images/t_10.jpg',
 'testing_images/t_11.jpg',
 'testing_images/t_12.jpg',
 'testing_images/t_13.jpg',
 'testing_images/t_14.jpg',
 'testing_images/t_15.jpg',
 'testing_images/t_16.jpg',
 'testing_images/t_17.jpg',
 'testing_images/t_18.jpg',
 'testing_images/t_19.jpg',
 'testing_images/t_20.jpg',
 'testing_images/t_21.jpg',
 'testing_images/t_22.jpg',
 'testing_images/t_23.jpg',
 'testing_images/t_24.jpg',
 'testing_images/t_25.jpg',
 'testing_images/t_26.jpg',
 'testing_images/t_27.jpg',
 'testing_images/t_28.jpg',
 'testing_images/t_29.jpg',
 'testing_images/t_30.jpg',
 'testing_images/t_31.jpg',
 'testing_images/t_32.jpg',
 'testing_images/t_33.jpg',
 'testing_images/t_34.jpg',
 'testing_images/t_35.jpg',
 'testing_images/t_3

In [167]:
testcase_images = load_images_from_name_list( \
                                         testing_case_dir, \
                                         testcase_image_names, \
                                         default_image_width, default_image_height)

In [174]:
pre_processed_testcase_images = pre_process_with_grayscale_median_clahe_darkening_erosion_dilation( \
                                                                                           testcase_images, \
                                                                                           median_blur_kernel_size, \
                                                                                           clahe_clip_limit, \
                                                                                           clahe_tile_grid_size, \
                                                                                           element_for_erosion_and_dilation, \
                                                                                           first_erosion_iterations, \
                                                                                           first_dilation_iterations)

In [175]:
input_testcase_data = list(map(lambda x:x.flatten(), pre_processed_testcase_images))

In [184]:
testcase_predictions = best_model.predict(input_testcase_data)

In [185]:
testcase_accuracy = accuracy_score(testcase_predictions, testcase_image_classes)
testcase_accuracy

0.58

In [186]:
testcase_conf_matrix = confusion_matrix(testcase_predictions, testcase_image_classes)
testcase_conf_matrix

array([[11,  1],
       [20, 18]], dtype=int64)

In [187]:
precision_final = precision_score(testcase_image_classes, testcase_predictions)
recall_final = recall_score(testcase_image_classes, testcase_predictions)
f1_final = f1_score(testcase_predictions, testcase_image_classes)

In [188]:
precision_final

0.47368421052631576

In [189]:
recall_final

0.9473684210526315

In [190]:
f1_final

0.631578947368421