In questo notebook le reti neurali vengono utilizzate per trovare e analizzare i pattern più complessi. Basta eseguire Runtime->esegui tutte

In [1]:
# from google.colab import drive
# drive.mount('/content/gdrive')

IMPORTANTE: queste versioni non vanno cambiate. Versioni successive di Tensorflow non fanno girare il codice.

In [2]:
# !pip install q keras==2.3.1
# !pip install q tensorflow==2.2.0  

In [3]:
from PIL import Image
import numpy as np

def background_thumbnail(template, modality, thumbnail_size=(200,200)):
    foreground = Image.fromarray(template).convert(modality)
    background = Image.new(modality, thumbnail_size, "white")
    foreground.thumbnail(thumbnail_size)
    (w, h) = foreground.size
    upper_left=(int((thumbnail_size[0] - w) / 2), int((thumbnail_size[1] - h) / 2))
    background.paste(foreground, upper_left)
    return np.array(background)

In [4]:
import numpy as np
import cv2
from matplotlib import pyplot as plt


def maxDeviationThresh(hist):
    maximum = max(hist)
    index_max = list(hist).index(maximum)
    index_min = 0
    for i in range(0, index_max):
        if not hist[i] and hist[i+1]:
            index_min = i
            break
    
    distances = []
    x1 = index_min
    y1 = hist[index_min]
    x2 = index_max
    y2 = hist[index_max]
    for i in range(index_min + 1, index_max):
        x0 = i
        y0 = hist[i]
        distance = np.abs((y2 - y1) * x0 - (x2 - x1) * y0 + x2 * y1 - y2 * x1) / np.sqrt(
            (y2 - y1) ** 2 + (x2 - x1) ** 2)
        distances.append(distance)
    if index_min < index_max - 1:
      T_index = distances.index(max(distances))
    else:
      T_index = -index_min
    return T_index + index_min


def extract_drawing(img):
  dst = cv2.bilateralFilter(img, 10, sigmaColor=15, sigmaSpace=15)
  #dst = img.copy()
  #max_occ = np.bincount(dst[dst > 0]).argmax()
  #dst[dst == 0] = max_occ
  threshed = np.ones(dst.shape, np.uint8) * 255
  if np.any(dst < 255):
      hist, _ = np.histogram(dst[dst < 255].flatten(), range(257))
      thresh_val = maxDeviationThresh(hist)
      mask = dst < thresh_val
      threshed[mask] = 0
  return threshed

In [5]:
import cv2
p_dst = [(382, 219),(852, 219), (852, 537), (382, 537)]
def computeHomographyRhomb(image, points):
    img = image.copy()
    point_rhomb = points[2] + (1,)
    mask = np.ones(5, dtype=int)
    mask[2] = 0
    right_points = np.array(points)[np.ma.make_mask(mask)]
    hm, status = cv2.findHomography(np.array(right_points), np.array(p_dst))
    new_point = np.dot(hm, point_rhomb)
    new_point = tuple(np.round(new_point/new_point[2]).astype(int))
    center = new_point[0:2]
    return center


In [6]:
import tensorflow as tf
def _pairwise_distances(embeddings, squared=False):
    """Compute the 2D matrix of distances between all the embeddings.

    Args:
        embeddings: tensor of shape (batch_size, embed_dim)
        squared: Boolean. If true, output is the pairwise squared euclidean distance matrix.
                 If false, output is the pairwise euclidean distance matrix.

    Returns:
        pairwise_distances: tensor of shape (batch_size, batch_size)
    """
    # Get the dot product between all embeddings
    # shape (batch_size, batch_size)
    im_embeddings = embeddings[:, :int(embeddings.shape[1] / 2)]
    #im_embeggings_np = im_embeddings.numpy()
    anchor_emb = embeddings[:, int(embeddings.shape[1] / 2):]
    anchor_emb = tf.expand_dims(anchor_emb[0], axis=0)
    #anchor_emb_np = anchor_emb.numpy()
    dot_product = tf.matmul(im_embeddings, tf.transpose(anchor_emb))
    #dot_product_np = dot_product.numpy()
    # Get squared L2 norm for each embedding. We can just take the diagonal of `dot_product`.
    # This also provides more numerical stability (the diagonal of the result will be exactly 0).
    # shape (batch_size,)
    square_norm_a = tf.reduce_sum(tf.square(im_embeddings), axis=1, keepdims=True)
    #square_norm_a_np = square_norm_a.numpy()
    square_norm_b = tf.reduce_sum(tf.square(anchor_emb), axis=1, keepdims=True)
    #square_norm_b_np = square_norm_b.numpy()
    # Compute the pairwise distance matrix as we have:
    # ||a - b||^2 = ||a||^2  - 2 <a, b> + ||b||^2
    # shape (batch_size, batch_size)
    distances = tf.add(square_norm_a, square_norm_b - 2.0*dot_product)

    # Because of computation errors, some distances might be negative so we put everything >= 0.0
    distances = tf.maximum(distances, 0.0)

    if not squared:
        # Because the gradient of sqrt is infinite when distances == 0.0 (ex: on the diagonal)
        # we need to add a small epsilon where distances == 0.0
        mask = tf.cast(tf.equal(distances, 0.0), float)
        distances = distances + mask * 1e-16

        distances = tf.sqrt(distances)

        # Correct the epsilon added: set the distances on the mask to be exactly 0.0
        distances = distances * (1.0 - mask)
    #distances_np = distances.numpy()
    return distances

def batch_hard_triplet_loss(y_true, y_pred):
    """Build the triplet loss over a batch of embeddings.

    For each anchor, we get the hardest positive and hardest negative to form a triplet.

    Args:
        labels: labels of the batch, of size (batch_size,)
        embeddings: tensor of shape (batch_size, embed_dim)
        margin: margin for triplet loss
        squared: Boolean. If true, output is the pairwise squared euclidean distance matrix.
                 If false, output is the pairwise euclidean distance matrix.

    Returns:
        triplet_loss: scalar tensor containing the triplet loss
    """
    # Get the pairwise distance matrix
    
    #margin = 1.
    labels = y_true
    squared=False
    labels = tf.cast(labels, dtype='int32')
    #label_np = labels.numpy()
    embeddings = y_pred

    pairwise_dist = _pairwise_distances(embeddings, squared=squared)
    #pairwise_dist_np = pairwise_dist.numpy()
    # For each anchor, get the hardest positive
    # First, we need to get a mask for every valid positive (they should have same label)
    #mask_anchor_positive = _get_anchor_positive_triplet_mask(labels)
    #mask_anchor_positive = tf.cast(mask_anchor_positive, float)

    # We put to 0 any element where (a, p) is not valid (valid if a != p and label(a) == label(p))
    anchor_positive_dist = tf.multiply(tf.cast(labels, float), pairwise_dist)
    #anchor_positive_dist_np=anchor_positive_dist.numpy()
    # shape (batch_size, 1)
    hardest_positive_dist = tf.reduce_max(anchor_positive_dist, axis=0)

    tf.summary.scalar("hardest_positive_dist", tf.reduce_mean(hardest_positive_dist))

    # For each anchor, get the hardest negative
    # First, we need to get a mask for every valid negative (they should have different labels)
    #mask_anchor_negative = _get_anchor_negative_triplet_mask(labels)
    #mask_anchor_negative = tf.cast(mask_anchor_negative, float)

    # We add the maximum value in each row to the invalid negatives (label(a) == label(n))
    #max_anchor_negative_dist = tf.reduce_max(pairwise_dist, axis=1, keepdims=True)
    anchor_negative_dist = tf.multiply(1-(tf.cast(labels, float)), pairwise_dist)
    #anchor_negative_dist_np = anchor_negative_dist.numpy()
    # shape (batch_size,)
    zero = tf.constant(0, dtype=tf.float32)
    where = tf.not_equal(anchor_negative_dist, zero)
    hardest_negative_dist = tf.reduce_min(anchor_negative_dist[where], axis=0)
    D = hardest_positive_dist - hardest_negative_dist
    margin = tf.math.log(1 + tf.math.exp(D))
    tf.summary.scalar("hardest_negative_dist", tf.reduce_mean(hardest_negative_dist))

    # Combine biggest d(a, p) and smallest d(a, n) into final triplet loss
    triplet_loss = tf.maximum(hardest_positive_dist - hardest_negative_dist + margin, 0.0)

    # Get final mean triplet loss
    #triplet_loss = tf.reduce_mean(triplet_loss)

    return triplet_loss


def compute_accuracy(y_true, y_pred):   
    

    labels = y_true
    squared = False
    labels = tf.cast(labels, dtype='int32')
    margin = 1.
    embeddings = y_pred

    pairwise_dist = _pairwise_distances(embeddings, squared=squared)

    # We put to 0 any element where (a, p) is not valid (valid if a != p and label(a) == label(p))
    anchor_positive_dist = tf.multiply(tf.cast(labels, float), pairwise_dist)
    zero = tf.constant(0, dtype=tf.float32)
    where = tf.not_equal(anchor_positive_dist, zero)
    positive_non_zero = anchor_positive_dist[where]
    # shape (batch_size, 1)

    # For each anchor, get the hardest negative
    # First, we need to get a mask for every valid negative (they should have different labels)

    # We add the maximum value in each row to the invalid negatives (label(a) == label(n))

    anchor_negative_dist = tf.multiply(1 - (tf.cast(labels, float)), pairwise_dist)

    # shape (batch_size,)
    zero = tf.constant(0, dtype=tf.float32)
    where = tf.not_equal(anchor_negative_dist, zero)
    hardest_negative_dist = tf.reduce_min(anchor_negative_dist[where], axis=0)

    positive_less_negative = tf.less_equal(positive_non_zero, hardest_negative_dist - margin)
    positive_less_negative = tf.cast(positive_less_negative, float)
    accuracy = tf.reduce_mean(positive_less_negative)
    return accuracy



In [7]:
import os

os.environ['CONDA_DEFAULT_ENV']

'rocf2_env'

In [8]:

from tensorflow.keras.models import load_model
from tensorflow.keras.utils import plot_model
from tensorflow.keras import backend as T
import math

In [9]:


root='../' #modificare se cartella rinominata

model_folder=root + 'first_model'
result_folder=root + 'results'
template_dic={
    0:'best_model_triplet_cross_transfer.hdf5',
    1:'best_model_triplet_face_transfer.hdf5',
    2:'best_model_triplet_rail_transfer.hdf5',
    3:'best_model_triplet_rombo_transfer.hdf5',
    4:'best_model_triplet_rett_diag_transfer.hdf5',
    5:'best_model_triplet_rect_transfer.hdf5',
    6:'best_model_triplet_cross_vert_transfer.hdf5'
}

model_folder
 

'../first_model'

In [10]:

 
class Grid():
    def __init__(self, coords):
        self.x = coords[0] 
        self.y = coords[1] 
        self.w = np.abs(coords[0] - coords[2])
        self.h = np.abs(coords[1] - coords[3])
        pad = 50
        step = 10
        row =  np.arange(self.x-pad, self.x+pad+1, step=step, dtype=int)
        column = np.arange(self.y-pad, self.y+pad+1, step=step, dtype=int)
        self.grid = np.transpose([np.tile(row, len(column)), np.repeat(column, len(row))])
        pad_a = 10
        self.actions = [
            lambda x, y, w, h: (x, y, w + pad_a, h),
            lambda x, y, w, h: (x, y, w, h + pad_a),
            lambda x, y, w, h: (x, y - pad_a, w, h),
            lambda x, y, w, h: (x - pad_a, y, w, h),
            lambda x, y, w, h: (x - pad_a, y - pad_a, w + pad_a, h + pad_a),
            lambda x, y, w, h: (x - pad_a, y, w + pad_a, h + pad_a),
            lambda x, y, w, h: (x, y - pad_a, w + pad_a, h + pad_a),
            lambda x, y, w, h: (x, y, w + pad_a, h + pad_a),
        ]
        

    
    def visualize(self, image, model, input_shape, template, idx, name):               
        min_val = np.inf
        save_min = False
        for i in range(len(self.grid)):                     
            x, y = self.grid[i]
            x = max(0, x)
            y = max(0, y)
            ROI = image[y:y + self.h, x:x + self.w]
            if ROI.size == 0:
                plt.imshow(image, cmap='gray')
                plt.show()     

            # threshed =  np.array(extract_drawing(ROI))
            input_img = background_thumbnail(ROI, 'L', (input_shape[0], input_shape[1]))
            
            input_img = input_img.astype('float32')
            input_img /= 255
            input_img =  np.repeat(input_img[..., np.newaxis], 3, -1)            
            #plt.imshow(input_img[:,:,0], cmap='gray')            
            #plt.show()
            #print(input_img.shape)
            #print(template.shape)
            #inp = np.array([[input_img], [template]])
            #print(inp.shape)
            
            result = model.predict([[input_img.reshape(1,100,100,3)], [template.reshape(1,100,100,3)]])
            embeddings = result
            result = _pairwise_distances(embeddings, squared=False).numpy()[0, 0]
            if result < min_val:
                min_val = result
                save_min = True
                min_y = y
                min_x = x
                min_input = input_img
                #min_bbox = bbox
            #if i in values:
              #print('done percent {} of template {}'.format(10*np.where(values == i)[0][0], idx))
        
        done = False
        min_w = self.w
        min_h = self.h
        min_x2 = min_x
        min_y2 = min_y
        iteraction = 0
        while not done and iteraction < 5:
          found_min = False
          for action in self.actions:
              (x, y, w, h) = action(min_x, min_y, min_w, min_h)
              ROI = image[y:y + h, x:x + w]
            #   threshed = np.array(extract_drawing(ROI))
              input_img = background_thumbnail(ROI, 'L',
                                          (input_shape[0], input_shape[1]))
              input_img = input_img.astype('float32')
              input_img /= 255
              input_img =  np.repeat(input_img[..., np.newaxis], 3, -1)
              #plt.imshow(input_img[:,:,0], cmap='gray')
              #plt.show()
              result = model.predict([[input_img.reshape(1,100,100,3)], [template.reshape(1,100,100,3)]])
              embeddings = result
              result = _pairwise_distances(embeddings, squared=False).numpy()[0, 0]
              if result < min_val:
                  min_val = result
                  min_x2 = x
                  min_y2 = y
                  min_w = w
                  min_h = h
                  min_input = input_img
                  found_min = True
          if found_min:
            min_x = min_x2
            min_y = min_y2
            w = min_w
            h = min_h
          else:
            done = True
          iteraction += 1
          print('done iteration {}'.format(iteraction))
        
        #print(min_bbox)
        clone2 = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
        cv2.rectangle(clone2, (min_x2, min_y2), (min_x2 + min_w, min_y2 + min_h), color=(255, 0, 0))        
        cv2.putText(clone2, str(min_val), (x + 20, y + 80), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1,
          color=(0, 0, 255))                
        cv2.rectangle(clone2, (self.x, self.y), (self.x+self.w, self.y+self.h), color=(0,0,255))
        #cv2.rectangle(clone2, (min_x+min_bbox[0], min_y+min_bbox[1]), (min_x+min_bbox[0]+min_bbox[2], min_y+min_bbox[1]+min_bbox[3]), color=(0,255,0))
        #plt.imshow(cv2.cvtColor(clone2, cv2.COLOR_BGR2RGB))
        #plt.show() 
        #plt.close('all')           
        cv2.imwrite(os.path.join(result_folder, name, 'minimum_'+str(idx)+'.png'), clone2)
        fig, ax = plt.subplots(nrows=1, ncols=2)
        ax.ravel()[0].imshow(min_input[:,:,0], cmap='gray')
        ax.ravel()[1].imshow(template[:,:,0], cmap='gray')
        plt.savefig(os.path.join(result_folder, name, 'input_'+str(idx)+'.png'))
        #plt.show()
        plt.close('all')    
        print('done template {}'.format(idx))    
        return min_val, math.hypot(int(min_x2-min_w/2-(self.x-self.w/2)), int(min_y2-min_h/2-(self.y-self.h/2))), (min_x2, min_y2, min_w, min_h)
      
class Visualization():
    def __init__(self, name, image, points, templates, shape, writer):
        self.img = image
        self.name = name
        self.grids=[]
        for point in points.values():
            self.grids.append(Grid(point))
        self.templates = templates
        self.input_shape = shape
        self.scores=[]
        self.distances=[]
        self.rects=[]
        self.writer = writer
  
 
    def run(self):
        if not os.path.isdir(os.path.join(result_folder, self.name)):
            os.makedirs(os.path.join(result_folder, self.name))
        for i in range(len(self.templates)):
                   
            #template = np.reshape(self.templates[i], self.input_shape)
            template = self.templates[i]
            template =  np.repeat(template[..., np.newaxis], 3, -1)
            self.model = load_model(os.path.join(model_folder, template_dic[i]), custom_objects={'batch_hard_triplet_loss': batch_hard_triplet_loss,
                                                                                         'compute_accuracy_hard': compute_accuracy})               
            #plot_model(self.model, show_shapes=True)
            max_val, distance, rect = self.grids[i].visualize(self.img, self.model, self.input_shape, template, i, self.name)
            self.scores.append(max_val)
            self.distances.append(distance)
            self.rects.append(rect)
            T.clear_session()
            del self.model          
            
        self.writer.writerow({'names':self.name, 'scores': self.scores, 'distances':self.distances, 'rect':self.rects})

In [11]:
import pandas as pd
import csv
import copy
import gc
import time
 
# root='./gdrive/My Drive/thesis' #modificare se cartella rinominata

label_dict={
    'cross.png':0,
    'face.png':1,
    'rail.png':2,
    'rombo.png':3,
    'rett_diag.png':4,
    'rect.png':5,
    'cross_vert.png':6
}
or_points = {
    "cross":[324,119,378,373],
    "face":[742, 287, 829, 373],
    "rail":[617, 383, 847, 534],
    "rhomb":[852, 229, 531],
    "rett_diag":[379, 300, 502, 456],
    "rect":[360, 525, 510, 680],
    "cross_vert":[502, 540, 810, 661]
}
scale_percent = 100
pad = 0
 
def unique_color(img):
    mask = img>0
    only_color = img[mask]
    colors, count = np.unique(only_color, return_counts=True)
    max_color = colors[count.argmax()]
    img[np.logical_not(mask)] = max_color
    return img
 
input_shape = (100,100,1)
template_folder=os.path.join(root, 'templates')
templates = np.zeros((7,input_shape[0], input_shape[1]))
for img in os.listdir(template_folder):
    if img != 'template.png':
        template = background_thumbnail(cv2.imread(os.path.join(template_folder, img),
                                                          cv2.IMREAD_GRAYSCALE), 'L', (input_shape[0], input_shape[1]))        
        template = template.astype('float32')
        template /= 255
        templates[label_dict[img]] = template
hom_folder = os.path.join(root, 'new_sample')
file_j = pd.read_json(os.path.join(hom_folder, 'points.txt'), lines=True).set_index('name')
img_list = pd.read_json(os.path.join(hom_folder, 'points.txt'), lines=True).loc[:, 'name'].values

 
if not os.path.isdir(os.path.join(root, 'results')):
    os.makedirs(os.path.join(root, 'results'))
fieldnames = ['names', 'scores', 'distances', 'rect']
if not os.path.isfile(os.path.join(root, 'results', 'scores.csv')):
  with open(os.path.join(root, 'results', 'scores.csv'), "w") as f:
            f.write(','.join(fieldnames)+'\n')
 
folders = pd.read_csv(os.path.join(root, 'results', 'scores.csv'), header=0, usecols=['names']).values.squeeze()
print(folders)
count = 1
total_time = 0
for image in os.listdir(hom_folder):
  if image.endswith('.png'):
    print("image{} of {}".format(count, len(os.listdir(hom_folder))))
    #homography = cv2.imread(os.path.join(hom_folder, 'APR2018_GR270418130633-064.png'))   
    #if image[:-4] not in folders:
    start_time = time.time()
    print(image)
    homography = cv2.imread(os.path.join(hom_folder, image), cv2.IMREAD_GRAYSCALE)
    #homography = unique_color(homography)
    or_points2 = copy.deepcopy(or_points)      
    points = np.array(file_j.loc[image[:-4]].to_numpy()[0])
    if points.shape ==(1,):
      points = np.array(points[0])
    points = [tuple(x) for x in points]
    r_points = computeHomographyRhomb(homography, points)
    or_points2['rhomb'].insert(2, r_points[0])
    homography = cv2.medianBlur(homography, 3)
    width = int(homography.shape[1] * scale_percent / 100)
    height = int(homography.shape[0] * scale_percent / 100)
    homography = cv2.resize(homography, (width, height), interpolation=cv2.INTER_AREA)
    for x,y in or_points2.items():
        or_points2[x] = np.array([int(p*(scale_percent/100)) for p in y])
        or_points2[x][0:2]-=pad
        or_points2[x][2:]+=pad
        or_points2[x] = or_points2[x].tolist()
  
    csv_file = open(os.path.join(root, 'results', 'scores.csv'), mode='a')
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
    app = Visualization(image[:-4], homography, or_points2, templates, input_shape, writer)
    #app = Visualization('APR2018_GR270418130633-064', homography, or_points, templates, input_shape, writer)
    app.run()      
    csv_file.close()
    del app
    print(gc.collect())
    end_time = time.time()
    loop_time = (end_time - start_time) / 60
    total_time += loop_time
    print('loop time: {}m'.format(loop_time))
    print('total_time: {}m'.format(total_time))
  count +=1

['RC001' 'RC002' 'RC003' 'RC004' 'RC009' 'RC011' 'RC012' 'RC013' 'RC014'
 'RC015' 'RC016' 'RC018' 'RC021' 'RC023' 'RC024' 'RC025' 'RC026' 'RC028'
 'RC029' 'RC030' 'RC031' 'RC034' 'RC036' 'RC037' 'RC038' 'RC039' 'RC041'
 'RC042' 'RC043' 'RC045' 'RC046' 'RC047' 'RC048' 'RC049' 'RC050' 'RC051'
 'RC052' 'RC054' 'RC055' 'RC056' 'RC057' 'RC059' 'RC060' 'RC061' 'RC062'
 'RC063' 'RC065' 'RC066' 'RC069' 'RC070' 'RC071' 'RC073' 'RC074' 'RC075'
 'RC076' 'RC077' 'RC078' 'RC079' 'RC082' 'RC083' 'RC084' 'RC085' 'RC086'
 'RC087' 'RC088' 'RC089' 'RC090' 'RC091' 'RC092' 'RC093' 'RC094' 'RC095'
 'RC096' 'RC097' 'RC099' 'RC100' 'RC101' 'RC102' 'RC103' 'RC104' 'RC105'
 'RC106' 'RC107' 'RC108' 'RC109' 'RC110' 'RC111' 'RC112' 'RC114' 'RC115'
 'RC116' 'RC117' 'RC120' 'RC121' 'RC122' 'RC123' 'RC124' 'RC125' 'RC126'
 'RC127' 'RC128' 'RC129' 'RC130' 'RC131' 'RC134' 'RC135' 'RC136' 'RC137'
 'RC138' 'RC139' 'RC141' 'RC143' 'RC144' 'RC145' 'RC147' 'RC149' 'RC150'
 'RC151' 'RC152' 'RC153' 'RC154' 'RC155' 'RC156' 'R

In [12]:
import tensorflow
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 15415716682736870489
, name: "/device:XLA_CPU:0"
device_type: "XLA_CPU"
memory_limit: 17179869184
locality {
}
incarnation: 5722355660218851485
physical_device_desc: "device: XLA_CPU device"
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 3178115892
locality {
  bus_id: 1
  links {
  }
}
incarnation: 3521164588587575507
physical_device_desc: "device: 0, name: NVIDIA GeForce GTX 950M, pci bus id: 0000:01:00.0, compute capability: 5.0"
, name: "/device:XLA_GPU:0"
device_type: "XLA_GPU"
memory_limit: 17179869184
locality {
}
incarnation: 12768017840245505512
physical_device_desc: "device: XLA_GPU device"
]
