In [1]:
from PIL import Image, ImageFilter
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import pandas as pd
import glob
import base64
import io
import os

#Funciones generales para uso global
import numpy as np
import time
from shapely.geometry import Polygon, Point
import geopandas as gpd
from shapely.ops import cascaded_union
import rasterio
from rasterio.mask import mask
from shapely import geometry
import pandas as pd
from geotiff import GeoTiff


#Funciones empleadas en la detección de texto
import os, sys
import numpy as np
import cv2
import time
from imutils.object_detection import non_max_suppression

import rasterio
from rasterio.enums import Resampling

import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'C:/Program Files/Tesseract-OCR\tesseract.exe'

In [2]:
# Takes a Rasterio dataset and splits it into squares of dimensions squareDim * squareDim
def splitImageIntoCells(img, filename, num_imgs=3):
    squareDim_wide = img.shape[1] // num_imgs
    squareDim_height = img.shape[0] // num_imgs
    
    numberOfCellsWide = img.shape[1] // squareDim_wide
    numberOfCellsHigh = img.shape[0] // squareDim_height
    x, y = 0, 0
    count = 0
    for hc in range(numberOfCellsHigh):
        y = hc * squareDim_height
        for wc in range(numberOfCellsWide):
            x = wc * squareDim_wide
            geom = getTileGeom(img.transform, x, y, squareDim_wide, squareDim_height)
            getCellFromGeom(img, geom, filename, count)
            count = count + 1

# Generate a bounding box from the pixel-wise coordinates using the original datasets transform property
def getTileGeom(transform, x, y, squareDim_wide, squareDim_height):
    corner1 = (x, y) * transform
    corner2 = (x + squareDim_wide, y + squareDim_height) * transform
    return geometry.box(corner1[0], corner1[1],
                        corner2[0], corner2[1])

# Crop the dataset using the generated box and write it out as a GeoTIFF
def getCellFromGeom(img, geom, filename, count):
    crop, cropTransform = mask(img, [geom], crop=True)
    writeImageAsGeoTIFF(crop,
                        cropTransform,
                        img.meta,
                        img.crs,
                        filename+"_"+str(count))

# Write the passed in dataset as a GeoTIFF
def writeImageAsGeoTIFF(img, transform, metadata, crs, filename):
    metadata.update({"driver":"GTiff",
                     "height":img.shape[1],
                     "width":img.shape[2],
                     "transform": transform,
                     "crs": 'EPSG:4686'})
    with rasterio.open(filename+".tif", "w", **metadata) as dest:
        dest.write(img)

In [3]:
def split_images(filepath, num_imgs=3):
    """
    Split a large image into a grid of 3x3
    smaller images.
    
    Arguments:
    ---------
    filepath: str
              file path of the large image
    num_imgs: int (optional)
              Number of rows and columns of the grid
              
    Output
    ------
    None
    
    """
    img = rasterio.open(filepath)
    splitImageIntoCells(img, "split_out/output_data")

In [4]:
def geotif_to_jpeg(tif_filename):
    """
    Converts geotif image from disk and saves it 
    in the same folder in jpeg format.
    
    Arguments
    ---------
    tif_filename: str
                  path of the tif file to convert
    
    Output
    ------
    None
    
    """
    with rasterio.open(tif_filename) as infile:    
        profile = infile.profile    
        profile['driver']='JPEG'
        jpeg_filename = tif_filename.replace(".tif", ".jpeg")
        with rasterio.open(jpeg_filename, 'w', **profile) as dst:
            dst.write(infile.read())

In [5]:
def predictions(prob_score, geo, args):
    (numR, numC) = prob_score.shape[2:4]
    boxes = []
    confidence_val = []

    for y in range(0, numR):
        scoresData = prob_score[0, 0, y]
        x0 = geo[0, 0, y]
        x1 = geo[0, 1, y]
        x2 = geo[0, 2, y]
        x3 = geo[0, 3, y]
        anglesData = geo[0, 4, y]

        for i in range(0, numC):
            if scoresData[i] < args["min_confidence"]:
                continue

            (offX, offY) = (i * 4.0, y * 4.0)

            angle = anglesData[i]
            cos = np.cos(angle)
            sin = np.sin(angle)

            h = x0[i] + x2[i]
            w = x1[i] + x3[i]

            endX = int(offX + (cos * x1[i]) + (sin * x2[i]))
            endY = int(offY - (sin * x1[i]) + (cos * x2[i]))
            startX = int(endX - w)
            startY = int(endY - h)

            boxes.append((startX, startY, endX, endY))
            confidence_val.append(scoresData[i])

    return (boxes, confidence_val)

In [6]:
def detect_text(path_img, raster_img):
    args = {"image":path_img, "east":"east_text_detection.pb", "min_confidence":0.2, "width":320, "height":320}
    
    image = cv2.imread(args['image'])
    orig = image.copy()
    (origH, origW) = image.shape[:2]
    (newW, newH) = (args["width"], args["height"])
    rW = origW / float(newW)
    rH = origH / float(newH)
    image = cv2.resize(image, (newW, newH))
    (H, W) = image.shape[:2]
    
    blob = cv2.dnn.blobFromImage(image, 1.0, (W, H),(123.68, 116.78, 103.94), swapRB=True, crop=False)
    
    net = cv2.dnn.readNet("ml_model/frozen_east_text_detection.pb")
    layerNames = ["feature_fusion/Conv_7/Sigmoid","feature_fusion/concat_3"]
    net.setInput(blob)
    (scores, geometry) = net.forward(layerNames)
    # Find predictions and  apply non-maxima suppression
    (boxes, confidence_val) = predictions(scores, geometry,args)
    boxes = non_max_suppression(np.array(boxes), probs=confidence_val)
    tessdata_dir_config = '--tessdata-dir "C:\\Program Files\\Tesseract-OCR\\tessdata"'
    
    ##Text Detection and Recognition 
    polygons = []
    text_detec = []
    condifence = []
    acum = 0
    for (startX, startY, endX, endY) in boxes:
        startX = int(startX * rW)
        startY = int(startY * rH)
        endX = int(endX * rW)
        endY = int(endY * rH)
        
        star = raster_img.xy(startY,startX) 
        end = raster_img.xy(endY,endX) 
        r = orig[startY:endY, startX:endX]
        configuration = ("-l eng --oem 1 --psm 8")
        text = pytesseract.image_to_string(r, config=tessdata_dir_config, lang='spa')
        polygons.append(Polygon([(star[0], star[1]),(end[0], star[1]),(end[0], end[1]),(star[0], end[1]),(star[0], star[1])]))
        text_detec.append(text)
        condifence.append(confidence_val[acum])
        acum += 1
    return polygons,text_detec,condifence

In [7]:
def create_geometries(filepath):
    """
    Splits the image in a grid, uses Google Vision API to extract text
    and the bounding boxes. This method creates geojson files with 
    the geographic names, their respective geodesic coordinates, the 
    confidence, and the geometries of the bounding boxes.
    
    The geojson files are saved in the folder geometries.
    
    Arguments
    ---------
    filepath: str
              Path to the geotiff image file to process
              
    Output
    ------
    None
    """

    split_images(filepath)
    images_to_proces = os.listdir(path='split_out')

    for path in images_to_proces:
        if ('.tif' in path):
            geotif_to_jpeg("split_out/" + path)
            sub_image_jpeg = path.replace('.tif','.jpeg')
            img = rasterio.open("split_out/" + sub_image_jpeg)
            try:
                poligonos,words,confidence  = detect_text( "split_out/" + sub_image_jpeg, img)
                centroids = [aux_polygon.representative_point() for aux_polygon in poligonos]
                sub_img_gdf = gpd.GeoDataFrame(columns=["toponimo_ocr","confidence",
                    "centroide_longitud","centroide_latitud","geometry"], crs=str(img.crs))
                sub_img_gdf["geometry"] = poligonos
                sub_img_gdf["toponimo_ocr"] = words
                sub_img_gdf["confidence"] = confidence
                sub_img_gdf["centroide_longitud"] = [x.coords[0][0] for x in centroids]
                sub_img_gdf["centroide_latitud"] = [x.coords[0][1] for x in centroids]
                name_geometry = 'geometries/'+sub_image_jpeg.replace('.jpeg','.geojson')
                try:
                    sub_img_gdf.to_file(name_geometry, driver="GeoJSON")
                except:
                    print('Empty text detected, no geometries generated!')
            except:
                    print('No geometries generated!')

In [8]:
def empty_folders():
    """
    Remove all temporary output files in the "split_out" and
    "geometries" folders. It also deletes the final output file
    detected_text.geojson.
    
    Arguments
    ---------
    None
    
    Output
    ------
    None
    """
    folders = ["split_out", "geometries"]
    
    for folder in folders:
        files = glob.glob(f"{folder}/*")
        for f in files:
            os.remove(f)
    try:
        os.remove("text_detected.geojson")
    except:
        print("text_detected.geojson already deleted!")

In [9]:
def combine_geometries(filepath):
    """
    Takes all the geojson files in the geometries folder and combine
    them into a single geojson file. It combines bounding boxes that 
    intersect each other and their respective geographic names. It 
    deletes the rows whose name has only numbers.
    
    Arguments
    ---------
    filepath: str
              Path of the original image
    
    Output
    ------
    all_toponyms_gdf: GeoDataFrame
                      GeoDataFrame with all the toponyms from the original
                      image. It contains the geographic name, geodesic 
                      coordinates of the centroid, confidence and the 
                      geometry of the bounding boxes.
    
    """
    org_img = rasterio.open(filepath)
    geometries_to_process = os.listdir(path='geometries')    
    rectangles = []
    all_toponyms_gdf = gpd.GeoDataFrame(columns=["toponimo_ocr","confidence",
                "centroide_longitud","centroide_latitud","geometry"],crs=str(org_img.crs))

    for geometry in geometries_to_process:
        if ".geojson" in geometry:
            file = gpd.read_file('geometries/' + geometry)
            all_toponyms_gdf = all_toponyms_gdf.append(file)

    all_toponyms_gdf.reset_index(drop=True, inplace=True)
    all_toponyms_gdf.to_file('text_detected.geojson',driver='GeoJSON')
    
    return all_toponyms_gdf

In [10]:
def union_geometries(geojson_toponimos):
    """
    It allows to detect the text divided organically by the subdivision 
    of the images and to unite the geometries and the detected text
    
    Arguments
    ---------
    geojson_toponimos: geopandas
                       Initial geopandas union of all split image
    
    Output: 
    ------
    new_geojseon: geopandas
                  geopandas with all union geometries and detected text
    
    """

    geojson_toponimos.reset_index(drop=True, inplace=True)
    new_geojseon = gpd.GeoDataFrame(columns=["toponimo_ocr","confidence","centroide_longitud","centroide_latitud",
                                             "geometry"],crs=geojson_toponimos.crs)
    while( len(geojson_toponimos)!=0 ):
        indexs = []
        new_elments =[]

        for i in range(0,len(geojson_toponimos)):
            if ( geojson_toponimos.geometry[0].intersects( geojson_toponimos.geometry[i] ) ):
                indexs.append(i)

        if ( len(indexs)==1):
            indexs = [0]
            new_geojseon = new_geojseon.append(geojson_toponimos.iloc[0:1])

        else:

            for j in range(0,4):
                for inde in range( len(indexs) ):
                    for i in range(0,len(geojson_toponimos)):
                        if ( geojson_toponimos.geometry[ indexs[inde] ].intersects( geojson_toponimos.geometry[i] ) ):
                            indexs.append(i)
                indexs = list(set(indexs))

            geom_list = [geojson_toponimos.geometry[j] for j in indexs]
            new_elments = [(geojson_toponimos['toponimo_ocr'][row],
                            geojson_toponimos['confidence'][row],
                            geojson_toponimos['centroide_longitud'][row]) for row in indexs]

            new_elments.sort(key=lambda tup: tup[2],reverse=False)
            toponimo_ocr = [' '.join([i[0] for i in new_elments])]
            confidence = [max([i[1] for i in new_elments])]
            new_geom = [ cascaded_union( geom_list )  ]
            centroide_longitud = [new_geom[0].representative_point().coords[0][0] ]
            centroide_latitud = [new_geom[0].representative_point().coords[0][1] ]
            new_word = {'toponimo_ocr': toponimo_ocr,'confidence': confidence, 'centroide_longitud':centroide_longitud,
                        'centroide_latitud':centroide_latitud,'geometry':new_geom}

            aux_geojson = gpd.GeoDataFrame(new_word,columns=["toponimo_ocr","confidence","centroide_longitud",
                                                             "centroide_latitud","geometry"],crs=geojson_toponimos.crs)
            new_geojseon = new_geojseon.append(aux_geojson)

        geojson_toponimos = geojson_toponimos.drop(labels=indexs, axis=0).reset_index(drop=True)
        
    special_characters = ["<","(","[","{","^","-","=","$","!","|","]","}",")","?","*","+",".",">","¡"]
    new_geojseon.reset_index(drop=True,inplace=True)
    for count in range(len(new_geojseon)):
        string = new_geojseon["toponimo_ocr"][count]
        for character in special_characters:
            string = string.replace(character, "")
        
        split_string = string.split(" ")
        new_string = ""
        for token in split_string:
            if(token.strip().isdigit()):
                if( len( str(int( token.strip() )) )==1 ):
                    new_string = new_string +" "+ token
            else:
                new_string = new_string +" "+ token
        new_geojseon.at[count,"toponimo_ocr"] = new_string
    
    new_geojseon["toponimo_ocr"] = new_geojseon["toponimo_ocr"].apply(lambda x: "" if x.strip().isdigit() else x )
    new_geojseon = new_geojseon[new_geojseon["toponimo_ocr"]!=""].reset_index(drop=True)
    new_geojseon.to_file('text_detected.geojson',driver='GeoJSON')
    
    return new_geojseon

In [11]:
def get_image_corners(filepath):
    """
    Returns the geodesic coordinates of the original image.
    
    Arguments
    ---------
    filepath: str
              Path of the original image
    
    Output
    ------
    image_corners: dict
                   Dictionary with two keys: "upper_left" and "lower_left"
                   The values are tuples with the latitude and longitude
                   of the image corners.
    """
    original_img = rasterio.open(filepath)
    image_corners = {
        "upper_left": original_img.xy(0,0),
        "lower_right": original_img.xy(original_img.shape[0], original_img.shape[1])
    }
    
    return image_corners

## Run model

In [12]:
aerophotos = glob.glob("geotiffs/*.tif")
aerophotos

['geotiffs\\C-1974 F-238.tif',
 'geotiffs\\C-1974 F-240.tif',
 'geotiffs\\C-2070 F-250.tif',
 'geotiffs\\C-2070 F-252.tif',
 'geotiffs\\M-1390 F-42286.tif',
 'geotiffs\\M-1390 F-42290.tif']

In [15]:
%%time
empty_folders()
create_geometries(aerophotos[1])
all_toponyms_img1 = combine_geometries(aerophotos[0])
all_toponyms_img1 = union_geometries(all_toponyms_img1)
get_image_corners(aerophotos[1])

  corner1 = (x, y) * transform
  corner2 = (x + squareDim_wide, y + squareDim_height) * transform


Empty text detected, no geometries generated!
No geometries generated!
Wall time: 23 s


{'upper_left': (-75.87331615888398, 3.9374634048839634),
 'lower_right': (-75.81190315188398, 3.8747698248839635)}