<a href="https://colab.research.google.com/github/jenieto/computer-vision/blob/normalize-scikit-learn/computer-vision.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Trabajo M0

In [0]:
# Montamos Google Drive
from google.colab import drive
drive.mount('/content/drive/')
!unzip -o "/content/drive/My Drive/Datasets/computer-vision-M2.zip" -d /content/

In [0]:
# Importamos librerias
import tensorflow as tf
import pandas as pd
import numpy as np
import cv2
import os
import re
from google.colab.patches import cv2_imshow
from sklearn.model_selection import train_test_split
from sklearn import preprocessing

In [0]:
# Definimos variables
csv_path = '/content/computer-vision-M2/anotaciones_itemsEvaluables_v3.csv'
images_path = '/content/computer-vision-M2/dataset1'
figure = 'a1'
target_shape = (128, 128, 1)
test_dataset_size = 0.25

In [0]:
# Funciones para leer los datos
def read_csv_data():
  data = pd.read_csv(csv_path, sep=';', names=['dir', 'image', 'figure', 'coords'], index_col=False, skipinitialspace=True)
  for key in data.keys():
    data[key] = data[key].apply(str.replace, args=(' ', ''))
  return data

In [0]:
# Leemos los datos
data = read_csv_data()
data

In [0]:
# Inicializamos una intancia scaler
scaler = preprocessing.StandardScaler()

# Funcion para leer una imagen y sus coordenadas
def read_sample(row, type=0, n_points=1):
  # Leer imagen
  path = os.path.join(images_path, 'grafos_' + row['dir'], 'grafo_' + row['image'] + '.png')
  raw_X = cv2.imread(path,0)
  raw_y = None
  proc_X = None
  proc_y = None
  if raw_X is not None: # TODO: algunas imagenes no existen
    shape = raw_X.shape
    raw_X_1 = raw_X[:shape[0]//2, :shape[1]//2] # Imagen original
    raw_X_2 = raw_X[:shape[0]//2, shape[1]//2:] # Imagen original invertida
    raw_X_3 = raw_X[shape[0]//2:, :shape[1]//2] # Imagen de grafos detalle alto
    raw_X_4 = raw_X[shape[0]//2:, shape[1]//2:] # Imagen de grafos detalle bajo
    subimage_shape = raw_X_1.shape
    raw_X_1 = cv2.resize(raw_X_1, dsize=(target_shape[0], target_shape[1]))
    raw_X_2 = cv2.resize(raw_X_2, dsize=(target_shape[0], target_shape[1]))
    raw_X_3 = cv2.resize(raw_X_3, dsize=(target_shape[0], target_shape[1]))
    raw_X_4 = cv2.resize(raw_X_4, dsize=(target_shape[0], target_shape[1]))
    proc_X_1 = scaler.fit_transform(raw_X_1) # Opción más simple: raw_X_1 / 255
    proc_X_2 = scaler.fit_transform(raw_X_2) # Opción más simple: raw_X_2 / 255
    proc_X_3 = scaler.fit_transform(raw_X_3) # Opción más simple: raw_X_3 / 255
    proc_X_4 = scaler.fit_transform(raw_X_4) # Opción más simple: raw_X_4 / 255
    if type == 0:
      raw_X = raw_X_2 # Sólo se usa la imagen original invertida
      proc_X = proc_X_2
    elif type == 1:
      raw_X = np.stack((raw_X_2, raw_X_3), axis=-1) # Se usa imagen original invertida + grafos alto detalle
      proc_X = np.stack((proc_X_2, proc_X_3), axis=-1)
    elif type == 2:
      raw_X = np.stack((raw_X_2, raw_X_3, raw_X_4), axis=-1) # Se usa imagen original invertida + grafos alto detalle + grafos bajo detalle
      proc_X = np.stack((proc_X_2, proc_X_3, proc_X_4), axis=-1)
    raw_X = np.expand_dims(raw_X, axis=0)
    proc_X = np.expand_dims(proc_X, axis=0)
    # Leer coordenadas
    flat = np.array([int(s) for s in re.findall('\d+', row['coords'])]) # Parsea los números en el texto
    mat = flat.reshape((-1, 2)) # Reagrupa las coordenadas en grupos de dos
    if mat.shape[0] >= n_points:
      mat = mat[:n_points, :] * 2 # Las coordenadas vienen divididas entre 2
      raw_y = (mat * np.array([target_shape[0] / subimage_shape[0], target_shape[1] / subimage_shape[1]])).flatten()
      proc_y = raw_y / np.array([target_shape[0], target_shape[1]])
      raw_y = np.expand_dims(raw_y, axis=0)
      proc_y = np.expand_dims(proc_y, axis=0)
  return raw_X, raw_y, proc_X, proc_y

# Funcion que devuelve las imagenes y las coordenadas
def read_data(data):
  firstIteration = True
  for _, row in data.iterrows():
    raw_X, raw_y, proc_X, proc_y = read_sample(row, 2)
    if firstIteration == True:
      images = raw_X
      labels = raw_y
      X = proc_X
      y = proc_y
      firstIteration = False
    elif raw_X is not None and raw_y is not None:
      images = np.concatenate((images, raw_X), axis=0)
      labels = np.concatenate((labels, raw_y), axis=0)
      X = np.concatenate((X, proc_X), axis=0)
      y = np.concatenate((y, proc_y), axis=0)
  return images, labels, X, y

In [0]:
# Generamos las imagenes y los outputs
images, labels, X, y = read_data(data[data['figure'] == 'a1'])
print(images.shape)
print(labels.shape)
print(X.shape)
print(y.shape)

In [0]:
# Mostramos una imagen con el punto de la figura marcado
def show_image(image, point=None):
  if point is not None:
    cv2_imshow(cv2.circle(cv2.cvtColor(image[:, :, 0], cv2.COLOR_GRAY2BGR), (int(point[0]), int(point[1])), 3, (0, 0, 255), -1))

show_image(images[196], labels[196])

In [0]:
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, Activation, BatchNormalization

# Creamos el modelo
def create_model():
  model = tf.keras.models.Sequential()
  
  model.add(Conv2D(6, (3, 3), activation='relu', input_shape=(128, 128, 3)))
  model.add(BatchNormalization())
  model.add(MaxPooling2D(pool_size=(2, 2)))
  model.add(Dropout(0.25))

  model.add(Conv2D(12, (3, 3), activation='relu'))
  model.add(BatchNormalization())
  model.add(MaxPooling2D(pool_size=(2, 2)))
  model.add(Dropout(0.25))

  model.add(Conv2D(24, (3, 3), activation='relu'))
  model.add(BatchNormalization())
  model.add(MaxPooling2D(pool_size=(2, 2)))
  model.add(Dropout(0.25))

  model.add(Flatten())
  model.add(Dense(128, activation='relu'))
  model.add(BatchNormalization())
  model.add(Dropout(0.5))
  model.add(Dense(2))
  
  return model

In [0]:
model = create_model()
train_images, test_images, train_labels, test_labels = train_test_split(X, y, test_size=test_dataset_size) # Creamos los datasets de train y test

In [0]:
# Compilamos y entrenamos el modelo
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])
history = model.fit(train_images, train_labels, validation_data=(test_images, test_labels), epochs=10)