In [108]:
import cv2
import numpy as np

def define_range(path):
    # Original image
    shape = cv2.imread(path,0).shape
    # Range of x_0
    x_0_lower = 0
    x_0_upper = shape[1]*(4/10)
    # Range of x_1
    x_1_lower = shape[1]*(4/10) 
    x_1_upper = shape[1]*(7/10)    

    return x_0_lower, x_0_upper, x_1_lower, x_1_upper

In [109]:
def change_x(coord, x_0_lower, x_0_upper, x_1_lower, x_1_upper):  
    if coord >= x_0_lower and coord <= x_0_upper:
        return 0
    elif coord >= x_1_lower and coord <= x_1_upper:
        return 1
    else:
        return -1

In [36]:
def detect_text(path):
    """
    Detects text in the file.
    Arguments: path (path to local saved image)
    """
    from google.cloud import vision
    import io

    #client = vision.ImageAnnotatorClient.from_service_account_json('/Users/elisamichelet/Documents/EPFLMaster/FDH/Projet/private_key.json')
    client = vision.ImageAnnotatorClient.from_service_account_json('C:/Users/gonxh/Documents/EPFL/Master/MA3/Foundations_of_DH/DH - Rolandi Libretti-a52be57f8f03.json')

    with io.open(path, 'rb') as image_file:
        content = image_file.read()
    
    image = vision.Image(content=content)

    response = client.text_detection(image=image)
    texts = response.text_annotations
    
    text_result = ""
    dict_bounds = dict()
    
    #Créer ranges de x
    x_0_lower, x_0_upper, x_1_lower, x_1_upper = define_range(path)
    
    #Traiter le texte pour qu'il s'append
    for text in texts:
        text_result += " " + text.description
        coord_x = change_x(text.bounding_poly.vertices[0].x, x_0_lower, x_0_upper, x_1_lower, x_1_upper)
        
        if coord_x != -1:
            #TODO (NAMES, SCENES)
            if coord_x in dict_bounds:
                dict_bounds[coord_x].append((text.bounding_poly.vertices[0].y, text.description))
            else:    
                dict_bounds[coord_x] = [(text.bounding_poly.vertices[0].y, text.description)]

    if response.error.message:
        raise Exception(
            '{}\nFor more info on error messages, check: '
            'https://cloud.google.com/apis/design/errors'.format(
                response.error.message))
    return dict_bounds

In [37]:
detect_text('./data/test_images/p25.jpg')

{0: [(38,
   "36\nLa Figlia, che Bambina abbandonai\nNelle Mede forefte,\nChi al Sen la ftringerå? Chi sàl, che un gior-\nSotto al Ciglio del Padre\nNon la guidi Fortuna. Al biondo crine,\nAlle Nere pupille, all'aria illuftre,\nChe in volto le sfavilla\nRiconofcerla o Spofo,\nForfe potrelti. Le pendea dal Collo\nGemma, che fù tuo dono\n( Forfe fecol'avrà.) Quella è tua figlia\nPadre l'abbraccia, e dille :\nE'norta la tua Madre,\nAntigona morì,\nAlc. Dimmi qual gemma,\nQual figlia mai rammenti\nDonna real ?\nAnt. Che giova\nA te!\nAlc. Più che nón credi.\nAnt. Afcolra.\nAh fe perdo la Vita,\nL'Arcano non fi perda. Or fon tre luftri,\nChe ne Bofchi di Media una Bambina.\nNacque da me. Raminga, efule, e fola\nN'andavo allora. Il caro Pegno in feno\nUn di a'ombrofa Palma al piede annofo\nPer nudrirlo mi reco. Al fuol l'adatto\nPer un folo momento. Orrida Fiera\nAllor, che in me venia,\nCol rugir mi fpaventa. In piè balzai:\nTimida il pegno lafcio :\nATTO\nTERZO.\nPalpitante men tuggo. Ivi 

* Faire OCR
* Pour toutes les box retournée par l'OCR, on les associe aux prob que DH-Segment à retourner et on fait la moyenne
* Choisir une threshold pour les proba

In [132]:
from tqdm import tqdm
from PIL import Image


def detect_box(path):
    """
    Detects text in the file.
    Arguments: path (path to local saved image)
    """
    from google.cloud import vision
    import io

    #client = vision.ImageAnnotatorClient.from_service_account_json('/Users/elisamichelet/Documents/EPFLMaster/FDH/Projet/private_key.json')
    client = vision.ImageAnnotatorClient.from_service_account_json('C:/Users/gonxh/Documents/EPFL/Master/MA3/Foundations_of_DH/DH - Rolandi Libretti-a52be57f8f03.json')

    #Load npy
    segmentation_path = "./data/results_segmentation/p23.npy"
    # Output data
    data = np.load(segmentation_path)
    # Resize original image (test)
    img = cv2.imread(path,0)
    img = cv2.resize(img, (np.shape(data)[2], np.shape(data)[1]))
    img = Image.fromarray(img)
    img.save("./data/test_antigone/p23.png" )

    
    with io.open("./data/test_antigone/p23.png", 'rb') as image_file:
        content = image_file.read()
    
    image = vision.Image(content=content)

    response = client.text_detection(image=image)
    texts = response.text_annotations
    
    text_result = ""
    dict_bounds = dict()
    #Créer ranges de x
    x_0_lower, x_0_upper, x_1_lower, x_1_upper = define_range("./data/test_antigone/p23.png")
    
    # Threshold for name segmentation
    mask = np.where(data[1]>0.2,1,0).astype(np.uint8)
    #Traiter le texte pour qu'il s'append
    #TODO: vérifier que première ligne doit dégager
    for text in tqdm(texts[1:]):
        proba = np.array([])
        top_left = text.bounding_poly.vertices[0]
        bottom_right = text.bounding_poly.vertices[2]
        
        # Pour chaque coordonnée y
        ratio_y = int((bottom_right.y - top_left.y)*0.4)
        ratio_x = int((bottom_right.x - top_left.x)*0.4)
        for y in range(top_left.y + ratio_y, bottom_right.y - ratio_y):
            # On parcout la largeur de l'image
            for x in range(top_left.x + ratio_x, bottom_right.x - ratio_x):
                proba = np.append(proba, mask[y][x])
            
        mean = proba.mean()
        if mean > 0.7:
            coord_x = change_x(text.bounding_poly.vertices[0].x, x_0_lower, x_0_upper, x_1_lower, x_1_upper)
            if coord_x != -1:
                #TODO (NAMES, SCENES)
                if coord_x in dict_bounds:
                    dict_bounds[coord_x].append((text.bounding_poly.vertices[0].y, text.description))
                else:    
                    dict_bounds[coord_x] = [(text.bounding_poly.vertices[0].y, text.description)]
                
    return dict_bounds

In [133]:
detect_box('./data/test_images/p23.jpg')

100%|███████████████████████████████████████████████████████████████████████████████| 327/327 [00:00<00:00, 578.90it/s]


{0: [(89, 'Cre.'),
  (133, 'altrove.'),
  (223, 'Eur.'),
  (245, 'Erm.'),
  (267, 'Lea.'),
  (311, 'Cre.'),
  (570, 'Eur.'),
  (595, 'Cre.'),
  (727, 'Eur.'),
  (750, 'Cre.')],
 1: [(354, 'Ant.'),
  (527, 'Cre.'),
  (636, 'Eur.'),
  (699, 'Cre.'),
  (721, 'Eur.')]}