# Cargando las librerias que utilizaremos

In [None]:
import splitfolders
import mlflow
import os
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing import image
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from matplotlib import pyplot as plt
from keras.callbacks import ModelCheckpoint, EarlyStopping

In [None]:
# Setting up MLFlow
mlflow.set_experiment('Transfer-Learning')

# Analizando nuestros datos (despues de aplicar Data Augmentation)

In [None]:
# Data path principal a nuestros datos
data_path = "C:/Users/abrah/Documents/Repos/RPatrones/ClasificaImagenes/data/Limpieza02/Final"
OK_images_path = os.listdir(data_path + '/OK/')
NG_images_path = os.listdir(data_path + '/NG/')

In [None]:
# Mostramos una muestra de las imagenes OK
plt.figure(figsize=(10,10))
for i in range(9):
    ax = plt.subplot(3,3,i+1)
    img = image.load_img(data_path + '/OK/' + OK_images_path[i])
    print(image.img_to_array(img).shape)
    plt.imshow(img)
    plt.title("OK Image")
    plt.axis("off")

In [None]:
# Mostramos una muestra de las imagenes NG
plt.figure(figsize=(10,10))
for i in range(9):
    ax = plt.subplot(3,3,i+1)
    img = image.load_img(data_path + '/NG/' + NG_images_path[i])
    print(image.img_to_array(img).shape)
    plt.imshow(img)
    plt.title("NG Image")
    plt.axis("off")

# Separamos nuestros datos en entrenamiento, validación y test.

In [None]:
# Separamos nuestros datos en conjuntos de entrenamiento, validación y test.
output_folder = 'C:/Users/abrah/Documents/Repos/RPatrones/ClasificaImagenes/data/Limpieza02/splitted/'
train_ratio = 0.8
val_ratio = 0.1
test_ratio = 0.1
splitfolders.ratio(data_path,output=output_folder, seed=1237,
            ratio = (train_ratio,val_ratio,test_ratio))

In [None]:
base_dir = 'C:/Users/abrah/Documents/Repos/RPatrones/ClasificaImagenes/data/Limpieza01/splitted/'
# Paths de los datos de entrenamiento
train_dir = os.path.join(base_dir, 'train')
train_ok_dir = os.path.join(train_dir, 'ok')
train_ng_dir = os.path.join(train_dir, 'ng')

#  Paths de los datos de validación
val_dir = os.path.join(base_dir, 'val')
val_ok_dir = os.path.join(val_dir, 'ok')
val_ng_dir = os.path.join(val_dir, 'ng')

# Paths de los datos de prueba
test_dir = os.path.join(base_dir, 'test')
test_ok_dir = os.path.join(test_dir, 'ok')
test_ng_dir = os.path.join(test_dir, 'ng')

# Número de datos utilizados para entrenamiento
# Lo utilizaremos para especificar el parametro step_size del modelo.
num_total_train = len(os.listdir(train_ok_dir)) + len(os.listdir(train_ng_dir))
num_total_val = len(os.listdir(val_ok_dir)) + len(os.listdir(val_ng_dir))

In [None]:
# Número de datos de entrenamiento
print(f'Training OK images: {len(os.listdir(train_ok_dir))}')
print(f'Training NG images: {len(os.listdir(train_ng_dir))}')

# Número de datos de validación
print(f'Validation OK images: {len(os.listdir(val_ok_dir))}')
print(f'Validation NG images: {len(os.listdir(val_ng_dir))}')

# Número de datos de prueba
print(f'Test OK images: {len(os.listdir(test_ok_dir))}')
print(f'Test NG images: {len(os.listdir(test_ng_dir))}')

# Formateamos nuestro data-set para el modelo

In [None]:
IMG_SHAPE = 256     # Images of 256 x 256
num_channels = 3    # Las imagenes estan en escala de grises.
batch_size = 8      # Como cargaremos los datos de entrenamiento

In [None]:
# Normalizamos los datos de entrenamiento y los cargamos en batches.
normalize_img_train = image.ImageDataGenerator(rescale=1.0/255)
norm_train_data = normalize_img_train.flow_from_directory(
    batch_size=batch_size,
    directory=train_dir,
    shuffle=True,
    target_size=(IMG_SHAPE,IMG_SHAPE),
    #color_mode='grayscale',
    class_mode='binary'
)
# Normalizamos los datos de validación y los cargamos en batches.
normalize_img_val = image.ImageDataGenerator(rescale=1.0/255)
norm_val_data = normalize_img_val.flow_from_directory(
    batch_size=batch_size,
    directory=val_dir,
    shuffle=True,
    target_size=(IMG_SHAPE,IMG_SHAPE),
    #color_mode='grayscale',
    class_mode='binary'
)

# Normalizamos los datos de prueba y los cargamos en batches.
normalize_img_test = image.ImageDataGenerator(rescale=1.0/255)
norm_test_data = normalize_img_test.flow_from_directory(
    batch_size=batch_size,
    directory=test_dir,
    # shuffle=True,
    target_size=(IMG_SHAPE,IMG_SHAPE),
    #color_mode='grayscale',
    class_mode='binary'
)

In [None]:
norm_train_data.class_indices

# Creando el modelo

In [None]:
mlflow.keras.autolog() # Utilizando MLflow para guardar los parametros utilziados y metricas obtenidas

In [None]:
pre_trained_model = tf.keras.applications.VGG19(input_shape=(IMG_SHAPE, IMG_SHAPE, num_channels), 
                                                include_top=False, weights="imagenet")

In [None]:
for layer in pre_trained_model.layers:
    print(layer.name)
    layer.trainable = False

In [None]:
last_layer = pre_trained_model.get_layer('block5_pool')
last_output = last_layer.output
x = tf.keras.layers.Flatten()(last_output)
x = tf.keras.layers.Dense(512, activation="relu")(x)
x = tf.keras.layers.Dense(200, activation="relu")(x)
x = tf.keras.layers.Dense(1, activation='sigmoid')(x)

In [None]:
model = tf.keras.Model(pre_trained_model.input, x)

# Compile the model

In [None]:
model.compile(optimizer='adam',loss='binary_crossentropy', metrics=["acc"])

In [None]:
model_path = 'C:/Users/abrah/Documents/Repos/RPatrones/ClasificaImagenes/notebooks/models/brake_vgg_2.h5'
# Se queda con el mejor modelo
checkpoint = ModelCheckpoint(filepath=model_path, monitor='val_loss', verbose=1,save_best_only=True,mode='min')
# Si no hay mejora en la perdida de los datos de validación, paramos.
early_stopping = EarlyStopping(monitor='val_loss',patience=3)

callbacks = [checkpoint, early_stopping]

In [None]:
model.summary()

# Training the model

In [None]:
import time
epochs = 5

start_time = time.time()
classifier = model.fit(
    norm_train_data,
    steps_per_epoch=(num_total_train//batch_size),
    epochs=epochs,
    shuffle=True,
    validation_data=norm_val_data,
    validation_steps=(num_total_val//batch_size),
    batch_size=batch_size,
    verbose=1,
    callbacks=callbacks
)
total_time = time.time() - start_time

In [None]:
model.save('C:/Users/abrah/Documents/Repos/RPatrones/ClasificaImagenes/notebooks/models/brake_vgg_1.h5')

In [None]:
# plotting the training accuracy and loss
# Training and validation accuracy:
print(classifier.history['val_loss'])
import matplotlib.pyplot as plt
plt.plot(classifier.history['acc'])
plt.plot(classifier.history['val_acc'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('epoch')
plt.legend(['train','test'],loc='upper left')
plt.figure(figsize = (60,20))
plt.show()

# Summarize history for loss
plt.plot(classifier.history['loss'],)
plt.plot(classifier.history['val_loss'],)
plt.title('Model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train','test'], loc = 'upper left')
plt.figure(figsize = (20,10))
plt.show()

# Testing the model

In [None]:
#model = keras.models.load_model(model_path)

In [None]:
result = model.evaluate(norm_test_data,batch_size=batch_size)
print('test_loss, test_accuracy', result)

# Confussion Matrix

In [None]:
import numpy as np
threshold = 0.50
# Predecimos en el conjunto de prueba
Y_pred_proba = model.predict(norm_test_data,num_total_train//batch_size)
# Asignamos a que clase pertenece.
Y_pred_labels = np.where(Y_pred_proba > threshold, 1, 0)

In [None]:
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report
conf_matrix = confusion_matrix(norm_test_data.classes,Y_pred_labels)
print('Confussion Matrix')
print(conf_matrix)
target_names = ['Bad', 'Good']
print(classification_report(norm_test_data.classes, Y_pred_labels,target_names=target_names))

# NG IMAGES

In [None]:
images = []
folder_path = test_ng_dir
images_list = os.listdir(folder_path)
for img in images_list:
    img_path = os.path.join(folder_path, img)
    img = plt.imread(img_path)
    images.append(img)

In [None]:
from matplotlib.patches import Rectangle
color = 'none'
true_negative = 0
false_positive = 0

to_show = 8
nrows = 2
ncols = 4

ng_items = np.count_nonzero(norm_test_data.classes == 0)
ng_ypred = np.copy(Y_pred_proba[0:ng_items])

i = 0
for row in range(nrows):
    row += 1
    plt.figure(figsize=(20,10))
    for col in range(ncols):
        col += 1
        # print(Y_pred_labels[i])
        pred = f"{ng_ypred[i, 0]:.4f}"
        
        plt.subplot(1, ncols, col)
        plt.text(IMG_SHAPE/2, IMG_SHAPE-5,"Pred =" + pred, color="orange", fontdict={"fontsize":13,"fontweight":'bold',"ha":"center", "va":"baseline"})
        
        if ng_ypred[i] <= threshold:
            color = 'g'
            true_negative = true_negative +1
        else:
            color = 'r'
            false_positive = false_positive +1
        plt.gca().add_patch(Rectangle((0,0),IMG_SHAPE,IMG_SHAPE,linewidth=5,edgecolor=color,facecolor='none'))

        plt.imshow(images[i])
        i += 1

    plt.show()

print('True negative =' +' '+ str(true_negative))
print('False negative =' + ' '+ str(false_positive))
print ('Total evaluated parts =' + ' ' + str(true_negative+false_positive))

# OK Images

In [None]:
images = []
folder_path = test_ok_dir
images_list = os.listdir(folder_path)
for img in images_list:
    img_path = os.path.join(folder_path, img)
    img = plt.imread(img_path)
    images.append(img)

In [None]:
color = 'none'
true_positive = 0
false_negative = 0

ok_items = np.count_nonzero(norm_test_data.classes)
ok_ypred = np.copy(Y_pred_proba[ng_items:])

print ('Evaluation on Good Parts')

i = 0
for row in range(nrows):
    row += 1
    plt.figure(figsize=(20,10))
    for col in range(ncols):
        col += 1
        pred = f"{ok_ypred[i, 0]:.4f}"
        
        plt.subplot(1, ncols, col)
        plt.text(IMG_SHAPE/2, IMG_SHAPE-5,"Pred =" + pred, color="orange", fontdict={"fontsize":13,"fontweight":'bold',"ha":"center", "va":"baseline"})
        
        if ok_ypred[i] >= threshold:
            color = 'g'
            true_positive = true_positive +1
        else:
            color = 'r'
            false_negative = false_negative +1
        plt.gca().add_patch(Rectangle((0,0),IMG_SHAPE,IMG_SHAPE,linewidth=5,edgecolor=color,facecolor='none'))

        plt.imshow(images[i])
        i += 1

    plt.show()
    
print('True positive =' +' '+ str(true_positive))
print('False negative =' + ' '+ str(false_negative))
print ('Total evaluated parts =' + ' ' + str(true_positive+false_positive))