<a href="https://colab.research.google.com/github/jenieto/computer-vision/blob/preprocesado-interactivo/computer-vision.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Trabajo M0

In [0]:
# Montamos Google Drive
from google.colab import drive
drive.mount('/content/drive/')
!unzip -o "/content/drive/My Drive/Datasets/computer-vision-M2.zip" -d /content/

In [0]:
# Importamos librerias
import tensorflow as tf
import pandas as pd
import numpy as np
import cv2
import os
import re
from google.colab.patches import cv2_imshow
from sklearn.model_selection import train_test_split
from sklearn import preprocessing

In [0]:
# Definimos variables
items_path = '/content/computer-vision-M2/anotaciones_itemsEvaluables_v3.csv'
quality_path = '/content/computer-vision-M2/anotacionCalidadCopia.csv'
images_path = '/content/computer-vision-M2/dataset2'
min_quality = 4
pattern = 'a4'
IMAGE_CHANNELS = 3
IMAGE_SIZE = (128, 128, IMAGE_CHANNELS)
test_dataset_size = 0.25

In [0]:
# Funciones para leer los datos

def remove_spaces(data, keys=None):
    if keys is None:
        keys = data.keys()
    for key in keys:
        data[key] = data[key].apply(str.replace, args=(' ', ''))

def read_csv_data():
  quality_data = pd.read_csv(quality_path, sep=';', names=['image', 'quality'], skipinitialspace=True)
  items_data = pd.read_csv(items_path, sep=';', names=['dir', 'image', 'figure', 'coords'], index_col=False, skipinitialspace=True)
  remove_spaces(items_data)
  remove_spaces(quality_data, keys=['image'])
  merged_data = pd.merge(items_data, quality_data, how='left', left_on='image', right_on='image')
  merged_data = merged_data[merged_data['quality'] >= min_quality]
  return merged_data

In [0]:
# Leemos los datos
data = read_csv_data()
data

In [0]:
# Inicializamos una intancia scaler
scaler = preprocessing.StandardScaler()

# Generar coordenadas
def generate_coords(row, pattern):
  flat = np.array([int(s) for s in re.findall('\d+', row['coords'])]) # Parsea los números en el texto
  mat = flat.reshape((-1, 2)) # Reagrupa las coordenadas en grupos de dos
  valid = True
  if pattern == 'a1':
    # La mayoría tienen 1 punto
    n_points = 1
    if mat.shape[0] == n_points:
      mat = mat[:n_points, :]
    else:
      valid = False
  elif pattern == 'a2':
    # Demasiada variación, de momento lo ignoramos
    valid = False
  elif pattern == 'a3':
    # Todas tienen 1 punto
    n_points = 1
    if mat.shape[0] == n_points:
      mat = mat[:n_points, :]
    else:
      valid = False
  elif pattern == 'a4':
    # Hay muchas con 2 puntos
    n_points = 2
    if mat.shape[0] == n_points:
      mat = mat[:n_points, :]
    else:
      valid = False
  elif pattern == 'a5':
    # Hay suficientes imágnes con 2 puntos
    n_points = 2
    if mat.shape[0] == n_points:
      mat = mat[:n_points, :]
    else:
      valid = False
  elif pattern == 'a6':
    # De momento lo ignoramos, demasiada variación
    valid = False
  elif pattern == 'a7':
    # Hay muchas con 2 puntos
    n_points = 2
    if mat.shape[0] == n_points:
      mat = mat[:n_points, :]
    else:
      valid = False
  elif pattern == 'a8':
    # Hay muchas con 2 puntos
    n_points = 2
    if mat.shape[0] == n_points:
      mat = mat[:n_points, :]
    else:
      valid = False
  elif pattern == 'a9':
    # De momento lo ignoramos, demasiada variación
    valid = False
  elif pattern == 'a10':
    # necesitamos 2 puntos, cogemos siempre el primero y el ultimo punto
    n_points = 2
    if mat.shape[0] == n_points:
      mat = np.array([mat[0], mat[-1]])
    else:
      valid = False
  elif pattern == 'a11':
    # todos tienen 1 punto
    n_points = 1
    if mat.shape[0] == n_points:
      mat = mat[:n_points, :]
    else:
      valid = False
  elif pattern == 'a12':
    # este patron no tiene suficientes datos
    valid = False
  elif pattern == 'a13':
    # el CSV se ha modificado a mano para que siempre haya 3 puntos
    n_points = 3
    if mat.shape[0] == n_points:
      mat = mat[:n_points, :]
    else:
      valid = False
  elif pattern == 'a14':
    # todos tienen 1 punto
    n_points = 1
    if mat.shape[0] == n_points:
      mat = mat[:n_points, :]
    else:
      valid = False
  elif pattern == 'a15':
    # necesitamos 2 puntos, cogemos siempre el primero y el ultimo punto
    n_points = 2
    if mat.shape[0] >= n_points:
      mat = np.array([mat[0], mat[-1]])
    else:
      valid = False
  elif pattern == 'a16':
    # necesitamos 2 puntos, cogemos siempre el primero y el ultimo punto
    n_points = 2
    if mat.shape[0] >= n_points:
      mat = np.array([mat[0], mat[-1]])
    else:
      valid = False
  elif pattern == 'a17':
    # todos tienen 1 punto
    n_points = 1
    if mat.shape[0] == n_points:
      mat = mat[:n_points, :]
    else:
      valid = False
  elif pattern == 'a18':
    # necesitamos 2 puntos, cogemos siempre el primero y el ultimo punto
    n_points = 2
    if mat.shape[0] >= n_points:
      mat = np.array([mat[0], mat[-1]])
    else:
      valid = False
  return mat, valid

# Funcion para leer una imagen y sus coordenadas
def read_sample(row, pattern='a1'):
  # Leer imagenes procesadas
  path = os.path.join(images_path, 'grafos_' + row['dir'] + '_limpiezaManual', 'grafo_' + row['image'] + '.png')
  raw_X = cv2.imread(path, 0)
  raw_y = None
  proc_X = None
  proc_y = None
  if raw_X is not None: # TODO: algunas imagenes no existen
    shape = raw_X.shape
    raw_X_1 = raw_X[:shape[0]//2, :shape[1]//2] # Imagen original
    raw_X_2 = raw_X[:shape[0]//2, shape[1]//2:] # Imagen original invertida
    raw_X_3 = raw_X[shape[0]//2:, :shape[1]//2] # Imagen de grafos detalle alto
    raw_X_4 = raw_X[shape[0]//2:, shape[1]//2:] # Imagen de grafos detalle bajo
    subimage_shape = raw_X_1.shape
    raw_X_1 = cv2.resize(raw_X_1, dsize=(IMAGE_SIZE[0], IMAGE_SIZE[1]))
    raw_X_2 = cv2.resize(raw_X_2, dsize=(IMAGE_SIZE[0], IMAGE_SIZE[1]))
    raw_X_3 = cv2.resize(raw_X_3, dsize=(IMAGE_SIZE[0], IMAGE_SIZE[1]))
    raw_X_4 = cv2.resize(raw_X_4, dsize=(IMAGE_SIZE[0], IMAGE_SIZE[1]))
    proc_X_1 = scaler.fit_transform(raw_X_1) # Opción más simple: raw_X_1 / 255
    proc_X_2 = scaler.fit_transform(raw_X_2) # Opción más simple: raw_X_2 / 255
    proc_X_3 = scaler.fit_transform(raw_X_3) # Opción más simple: raw_X_3 / 255
    proc_X_4 = scaler.fit_transform(raw_X_4) # Opción más simple: raw_X_4 / 255
    if IMAGE_CHANNELS == 1:
      raw_X = raw_X_2 # Sólo se usa la imagen original invertida
      proc_X = proc_X_2
    elif IMAGE_CHANNELS == 2:
      raw_X = np.stack((raw_X_2, raw_X_3), axis=-1) # Se usa imagen original invertida + grafos alto detalle
      proc_X = np.stack((proc_X_2, proc_X_3), axis=-1)
    elif IMAGE_CHANNELS == 3:
      raw_X = np.stack((raw_X_2, raw_X_3, raw_X_4), axis=-1) # Se usa imagen original invertida + grafos alto detalle + grafos bajo detalle
      proc_X = np.stack((proc_X_2, proc_X_3, proc_X_4), axis=-1)
    raw_X = np.expand_dims(raw_X, axis=0)
    proc_X = np.expand_dims(proc_X, axis=0)
    # Leer coordenadas
    mat, valid = generate_coords(row, pattern)
    if valid:
      raw_y = (mat * np.array([2 * IMAGE_SIZE[0] / subimage_shape[0], 2 * IMAGE_SIZE[1] / subimage_shape[1]]))
      proc_y = raw_y / np.array([IMAGE_SIZE[0], IMAGE_SIZE[1]])
      raw_y = np.expand_dims(raw_y.flatten(), axis=0)
      proc_y = np.expand_dims(proc_y.flatten(), axis=0)
  return raw_X, raw_y, proc_X, proc_y

# Funcion que devuelve las imagenes y las coordenadas
def read_data(data, pattern):
  data = data[data['figure'] == pattern]
  firstIteration = True
  for _, row in data.iterrows():
    raw_X, raw_y, proc_X, proc_y = read_sample(row, pattern)
    if raw_X is not None and raw_y is not None and proc_y is not None:
      if firstIteration == True:
        images = raw_X
        labels = raw_y
        X = proc_X
        y = proc_y
        firstIteration = False
      else:
        images = np.concatenate((images, raw_X), axis=0)
        labels = np.concatenate((labels, raw_y), axis=0)
        X = np.concatenate((X, proc_X), axis=0)
        y = np.concatenate((y, proc_y), axis=0)
  return images, labels, X, y

In [0]:
# Generamos las imagenes y los outputs
images, labels, X, y = read_data(data, pattern)
print(images.shape)
print(labels.shape)
print(X.shape)
print(y.shape)

In [0]:
# Mostramos una imagen con el punto de la figura marcado
def show_image(image, point=None):
  if point is not None:
    im_color = cv2.cvtColor(image[:, :, 0], cv2.COLOR_GRAY2BGR)
    for i in range(point.shape[0]//2):
      im_color = cv2.circle(im_color, (int(point[2*i]), int(point[2*i+1])), 3, (0, 0, 255), -1)
    cv2_imshow(im_color)

show_image(images[25], labels[25])

In [0]:
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, Activation, BatchNormalization

# Creamos el modelo
def create_model():
  model = tf.keras.models.Sequential()
  
  model.add(Conv2D(24, (3, 3), activation='relu', input_shape=IMAGE_SIZE))
  model.add(BatchNormalization())
  model.add(MaxPooling2D(pool_size=(2, 2)))
  model.add(Dropout(0.25))

  model.add(Conv2D(48, (3, 3), activation='relu'))
  model.add(BatchNormalization())
  model.add(MaxPooling2D(pool_size=(2, 2)))
  model.add(Dropout(0.25))

  model.add(Conv2D(96, (3, 3), activation='relu'))
  model.add(BatchNormalization())
  model.add(MaxPooling2D(pool_size=(2, 2)))
  model.add(Dropout(0.25))

  model.add(Flatten())
  model.add(Dense(256, activation='relu'))
  model.add(BatchNormalization())
  model.add(Dropout(0.5))
  model.add(Dense(y.shape[1], activation='relu'))
  
  return model

In [0]:
model = create_model()
train_images, test_images, train_labels, test_labels = train_test_split(X, y, test_size=test_dataset_size) # Creamos los datasets de train y test

In [0]:
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard, EarlyStopping

def initializeCallbacks():
    filepath_mdl = 'model.h5'
    checkpoint = ModelCheckpoint(filepath_mdl, monitor='val_loss', verbose=1, save_best_only=True)
    tensorboard = TensorBoard(log_dir='./logs', batch_size=8, write_graph=True, write_images=True)
    earlystopping = EarlyStopping(patience=10, verbose=1)
    return [checkpoint, tensorboard, earlystopping]

# Compilamos y entrenamos el modelo
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])
model.summary()
output = model.fit(train_images, train_labels, validation_data=(test_images, test_labels), epochs=50, verbose=1, callbacks=initializeCallbacks())

In [0]:
# Load the TensorBoard notebook extension
%load_ext tensorboard

In [0]:
# Start TensorBoard within the notebook using magics
%tensorboard --logdir logs

In [0]:
# Evaluar resultado
prediction = model.predict(np.expand_dims(X[0], axis=0)) # Predict first image
prediction = (prediction.reshape((-1, 2)) * np.array([IMAGE_SIZE[0], IMAGE_SIZE[1]])).reshape((1, -1)) # Invert normalization
show_image(images[0], prediction[0])
show_image(images[0], labels[0])