In [7]:
#Funciones generales para uso global
import numpy as np
import time
from shapely.geometry import Polygon, Point
import geopandas as gpd
from shapely.ops import cascaded_union
import rasterio
from rasterio.mask import mask
from shapely import geometry
import pandas as pd
from geotiff import GeoTiff

#Funciones para la aplicación de OCR
import pytesseract
#tessdata_dir_config = '--tessdata-dir "C:\\Program Files\\Tesseract-OCR\\tessdata"'
from PIL import ImageGrab
import PIL
from itertools import product

#Funciones empleadas en la detección de texto
import os, sys
import numpy as np
import cv2
import time
from imutils.object_detection import non_max_suppression

# DS4A MINTIC 2021
## Team 18, proyecto IGAC

# <center> <font color='blue'> Parte 1. generación de funciones importantes </font> <center>

## Funciones para subdividir la imagen original
Nota: Problemas para la herencia de coordenadas

In [9]:
# Takes a Rasterio dataset and splits it into squares of dimensions squareDim * squareDim
def splitImageIntoCells(img, filename, squareDim):
    numberOfCellsWide = img.shape[1] // squareDim
    numberOfCellsHigh = img.shape[0] // squareDim
    x, y = 0, 0
    count = 0
    for hc in range(numberOfCellsHigh):
        y = hc * squareDim
        for wc in range(numberOfCellsWide):
            x = wc * squareDim
            geom = getTileGeom(img.transform, x, y, squareDim)
            getCellFromGeom(img, geom, filename, count)
            count = count + 1

# Generate a bounding box from the pixel-wise coordinates using the original datasets transform property
def getTileGeom(transform, x, y, squareDim):
    corner1 = (x, y) * transform
    corner2 = (x + squareDim, y + squareDim) * transform
    return geometry.box(corner1[0], corner1[1],
                        corner2[0], corner2[1])

# Crop the dataset using the generated box and write it out as a GeoTIFF
def getCellFromGeom(img, geom, filename, count):
    crop, cropTransform = mask(img, [geom], crop=True)
    writeImageAsGeoTIFF(crop,
                        cropTransform,
                        img.meta,
                        img.crs,
                        filename+"_"+str(count))

# Write the passed in dataset as a GeoTIFF
def writeImageAsGeoTIFF(img, transform, metadata, crs, filename):
    metadata.update({"driver":"GTiff",
                     "height":img.shape[1],
                     "width":img.shape[2],
                     "transform": transform,
                     "crs": crs})
    with rasterio.open(filename+".tif", "w", **metadata) as dest:
        dest.write(img)

## Función opcional para dividir la imagen original
Nota: Es opcional, fue la primera opción de divisón de imagenes, pero no hereda coordenadas

In [10]:
#function to split the image depending of the d:= tile size
def tile(filename, dir_in, dir_out, d):
    name, ext = os.path.splitext(filename)
    img = PIL.Image.open(os.path.join(dir_in, filename))
    w, h = img.size
    
    grid = list(product(range(0, h-h%d, d), range(0, w-w%d, d)))
    for i, j in grid:
        box = (j, i, j+d, i+d)
        out = os.path.join(dir_out, f'{name}_{i}_{j}{ext}')
        crop_image = img.crop(box)
        crop_image = crop_image.paste( img,(j,i+d) )
        crop_image.save(out)

## Función que genera un bounding box global de un poligono
Nota: El poligono es el resultado de hacer "CascadeUnion" de la union de dos o mas BoundingBox que se intersectan

In [11]:
def bounding_box(poligon):
    x_coordinates = list(poligon.exterior.coords.xy[0])
    y_coordinates = list(poligon.exterior.coords.xy[1])

    return Polygon([(min(x_coordinates),min(y_coordinates)),(max(x_coordinates),min(y_coordinates)),(max(x_coordinates), max(y_coordinates)),(min(x_coordinates), max(y_coordinates))])

## Función que retorna todos Bounding Box que se tocan por Bounding
Nota: Ingresan todos los BoundingsBox, luego determina cuales se tocan y los unen "CascadeUnion" y aplica "Boundin_box" (la función anterior)

In [12]:
def union_geometries(geom_list):
    #list all True intersecctions
    news_geometries = []
    while( len(geom_list)!=0 ):
        logic_aux = [geom_list[0].intersects( geom_list[i] ) for i in range(0,len(geom_list) ) ]
        indexs = np.sort( [i for i, e in enumerate(logic_aux) if e == True] )
        news_geometries.append( bounding_box( cascaded_union( [geom_list[j] for j in indexs] ) ) )
        acum = 0
        for j in indexs:
            delete = j+acum
            geom_list.pop(delete)
            acum -= 1
    return news_geometries

## Función que genera los objetos tipo Polygon
Nota: La red retorna las coordenadas donde detecto texto; Esta función retorna los polygonos detectados, su unión con "union_geometrias" y retorna una lista nuevamente

In [13]:
def union_rectangulos(rects):
    geometrias = [ Polygon([ (rect[0],rect[1]),(rect[2],rect[1]),(rect[2],rect[3]),(rect[0],rect[3]),(rect[0],rect[1]) ]) for rect in rects]
    resultado = union_geometries(geometrias)
    new_rect = [( int( min(list(resultado[poligon].exterior.coords.xy[0])) ),int( min(list(resultado[poligon].exterior.coords.xy[1])) ),int( max(list(resultado[poligon].exterior.coords.xy[0])) ),int( max(list(resultado[poligon].exterior.coords.xy[1])) ) ) for poligon in range(0,len(resultado)) ]
    return new_rect

# <center> <font color='blue'> Parte 2. Dividiendo la imagen original </font> <center>
    
    Nota: La imagen se divide respecto a la cantidad de pixeles deseados. Ejmplo: 800

In [14]:
fp = 'example.jpg'
img = rasterio.open(fp)
splitImageIntoCells(img, 'SPLIT_OUT/salida_data', 800)

RasterioIOError: example.jpg: No such file or directory

# <center> <font color='blue'> Parte 3. Red entrenada con detección de objetos </font> <center>

## Función que aplica la red a la imagen
Nota: Esta función es general y se aplica a la imagen que se desee grande o pequeña, pero genera mejores resultados a las pequeñas

In [11]:
def east_detect(image, image2):
    layerNames = [
        "feature_fusion/Conv_7/Sigmoid",
        "feature_fusion/concat_3"]
    
    orig = image.copy()
    
    if len(image.shape) == 2:
        image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
    
    (H, W) = image.shape[:2]
    
    # set the new width and height and then determine the ratio in change
    # for both the width and height: Should be multiple of 32
    (newW, newH) = (320, 320)
    
    rW = W / float(newW)
    rH = H / float(newH)
    
    # resize the image and grab the new image dimensions
    image = cv2.resize(image, (newW, newH))
    
    (H, W) = image.shape[:2]
    
    net = cv2.dnn.readNet("frozen_east_text_detection.pb")
    
    blob = cv2.dnn.blobFromImage(image, 3, (W, H),
        (123.68, 116.78, 103.94), swapRB=True, crop=False)
    
    start = time.time()
    
    net.setInput(blob)
    
    (scores, geometry) = net.forward(layerNames)
    
    (numRows, numCols) = scores.shape[2:4]
    rects = []
    rects_2 = []
    confidences = []
    # loop over the number of rows
    for y in range(0, numRows):
        # extract the scores (probabilities), followed by the geometrical
        # data used to derive potential bounding box coordinates that
        # surround text
        scoresData = scores[0, 0, y]
        xData0 = geometry[0, 0, y]
        xData1 = geometry[0, 1, y]
        xData2 = geometry[0, 2, y]
        xData3 = geometry[0, 3, y]
        anglesData = geometry[0, 4, y]
    
        for x in range(0, numCols):
            # if our score does not have sufficient probability, ignore it
            # Set minimum confidence as required
            if scoresData[x] < 0.5:
                continue
            # compute the offset factor as our resulting feature maps will
            #  x smaller than the input image
            (offsetX, offsetY) = (x * 4.0, y * 4.0)
            # extract the rotation angle for the prediction and then
            # compute the sin and cosine
            angle = anglesData[x]
            cos = np.cos(angle)
            sin = np.sin(angle)
            # use the geometry volume to derive the width and height of
            # the bounding box
            h = xData0[x] + xData2[x]
            w = xData1[x] + xData3[x]
            # compute both the starting and ending (x, y)-coordinates for
            # the text prediction bounding box
            endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))
            endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))
            startX = int(endX - w)
            startY = int(endY - h)
            # add the bounding box coordinates and probability score to
            # our respective lists
            
            """
            pixels2coords = map_layer.xy(startX,startY) 
            rects.append((startX, startY, endX, endY))
            confidences.append(scoresData[x])
            """
            
            star = image2.xy(startX,startY) 
            end = image2.xy(endX,endY) 
            rects.append(( int( np.round(star[0]) ),int( np.round(star[1])),
                          np.round(int(end[0])),np.round(int(end[1])) ))
            
            rects_2.append((startX, startY, endX, endY))
            confidences.append(scoresData[x])
                        
    rects = union_rectangulos(rects)
    confidences = [1 for i in range(0, len(rects) )]
    boxes = non_max_suppression(np.array(rects), probs=confidences)
    
    
    rects_2 = union_rectangulos(rects_2)
    confidences_2 = [1 for i in range(0, len(rects_2) )]
    boxes_2 = non_max_suppression(np.array(rects_2), probs=confidences_2)
    
    # loop over the bounding boxes
    for (startX, startY, endX, endY) in boxes_2:
        # scale the bounding box coordinates based on the respective
        # ratios
        startX = int(startX * rW)
        startY = int(startY * rH)
        endX = int(endX * rW)
        endY = int(endY * rH)
        # draw the bounding box on the image
        cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 255, 0), 5)
    
    
    print(time.time() - start)
    return orig, scoresData,rects,confidences

## Cargando la imagen original y se extraen las coordenadas offset

In [12]:
#Cargando las coordenadas del archivo para extraer las coordenadas
with open('SPLIT_IN/example.jgwx') as f:
    lines = f.readlines()
    
coords = [coord.replace('\n','') for coord in lines]
x_0 = float(coords[4].replace(',','.'))
y_0 = float(coords[5].replace(',','.'))
x_mov = float(coords[0].replace(',','.'))
y_mov = float(coords[3].replace(',','.'))

## Aplicando la detección de texto a cada imagen dividida 

In [13]:
image_to_proces = os.listdir(path='SPLIT_OUT')
for path in image_to_proces:
    image2 = rasterio.open('SPLIT_OUT/'+path )
    image = cv2.imread('SPLIT_OUT/'+path )
    out_image,scores,rects,confidences = east_detect(image, image2)
    cv2.imwrite('PPROCES_IMAGE_OUT/'+path , out_image)
    
    lista_toponimos = gpd.GeoDataFrame(crs=3116)
    geometrias = []
    for coords_bounding in rects:
        x = x_0 + ( float(coords_bounding[0])*x_mov)
        y = y_0 + ( float(coords_bounding[1])*y_mov)
        geometrias.append( Point(x,y) )
    
    path_geometrias = 'GEOMETRIES_OUT/'+path.split('.')[0]+'.geojson'
    lista_toponimos['geometry'] = geometrias
    lista_toponimos['ID'] = [i for i in range(0, len(geometrias))]
    try:
        lista_toponimos.to_file(path_geometrias,driver='GeoJSON')
    except:
        continue

0.25832033157348633


  lista_toponimos = gpd.GeoDataFrame(crs=3116)


0.38372182846069336
0.312408447265625
0.272350549697876
0.39411306381225586
0.3220391273498535
0.25575876235961914
0.4227919578552246
0.3855469226837158
0.3792283535003662
0.39560580253601074
0.31768107414245605
0.3766915798187256
0.40477442741394043
0.3901636600494385
0.3406834602355957
0.38347339630126953
0.295574426651001
0.27260351181030273
0.38254380226135254
0.3708186149597168
0.30400896072387695
0.28203582763671875
0.25257110595703125
0.3394043445587158
0.32978057861328125
0.3572874069213867
0.33396387100219727
0.3371715545654297
0.282656192779541
0.2593495845794678
0.2741231918334961
0.2998504638671875
0.2956697940826416
0.28529977798461914
0.2960085868835449
0.32949376106262207
0.2986893653869629
0.328873872756958
0.3088681697845459
0.3498713970184326
0.31705665588378906


# <center> <font color='red'> ANEXOS 1. Codigos de chapaleo y OCR </font> <center>
   Nota: En esta sección se ejecutan pruebas para detección de texto y aplicación de OCR

In [None]:
#Loading packages 
import numpy as np
import cv2
from imutils.object_detection import non_max_suppression
import pytesseract
from matplotlib import pyplot as plt

#Creating argument dictionary for the default arguments needed in the code. 
args = {"image":"example_2.jpg", "east":"east_text_detection.pb", "min_confidence":0.2, "width":320, "height":320}


In [None]:
args['image']="example_2.jpg"
image = cv2.imread(args['image'])

#Saving a original image and shape
orig = image.copy()
(origH, origW) = image.shape[:2]

# set the new height and width to default 320 by using args #dictionary.  
(newW, newH) = (args["width"], args["height"])

#Calculate the ratio between original and new image for both height and weight. 
#This ratio will be used to translate bounding box location on the original image. 
rW = origW / float(newW)
rH = origH / float(newH)

# resize the original image to new dimensions
image = cv2.resize(image, (newW, newH))
(H, W) = image.shape[:2]

# construct a blob from the image to forward pass it to EAST model
blob = cv2.dnn.blobFromImage(image, 1.0, (W, H),
    (123.68, 116.78, 103.94), swapRB=True, crop=False)

In [None]:
# load the pre-trained EAST model for text detection 
net = cv2.dnn.readNet("frozen_east_text_detection.pb")

# The following two layer need to pulled from EAST model for achieving this. 
layerNames = [
    "feature_fusion/Conv_7/Sigmoid",
    "feature_fusion/concat_3"]
  
#Forward pass the blob from the image to get the desired output layers
net.setInput(blob)
(scores, geometry) = net.forward(layerNames)

# Returns a bounding box and probability score if it is more than minimum confidence
def predictions(prob_score, geo):
    (numR, numC) = prob_score.shape[2:4]
    boxes = []
    confidence_val = []

    # loop over rows
    for y in range(0, numR):
        scoresData = prob_score[0, 0, y]
        x0 = geo[0, 0, y]
        x1 = geo[0, 1, y]
        x2 = geo[0, 2, y]
        x3 = geo[0, 3, y]
        anglesData = geo[0, 4, y]

        # loop over the number of columns
        for i in range(0, numC):
            if scoresData[i] < args["min_confidence"]:
                continue

            (offX, offY) = (i * 4.0, y * 4.0)

            # extracting the rotation angle for the prediction and computing the sine and cosine
            angle = anglesData[i]
            cos = np.cos(angle)
            sin = np.sin(angle)

            # using the geo volume to get the dimensions of the bounding box
            h = x0[i] + x2[i]
            w = x1[i] + x3[i]

            # compute start and end for the text pred bbox
            endX = int(offX + (cos * x1[i]) + (sin * x2[i]))
            endY = int(offY - (sin * x1[i]) + (cos * x2[i]))
            startX = int(endX - w)
            startY = int(endY - h)

            boxes.append((startX, startY, endX, endY))
            confidence_val.append(scoresData[i])

    boxes = union_rectangulos(boxes)
    confidence_val = [1 for i in range(0, len(boxes) )]
    return (boxes, confidence_val)

In [None]:
# Find predictions and  apply non-maxima suppression
(boxes, confidence_val) = predictions(scores, geometry)
boxes = non_max_suppression(np.array(boxes), probs=confidence_val)
tessdata_dir_config = '--tessdata-dir "C:\\Program Files\\Tesseract-OCR\\tessdata"'

##Text Detection and Recognition 

# initialize the list of results
results = []

# loop over the bounding boxes to find the coordinate of bounding boxes
for (startX, startY, endX, endY) in boxes:
    # scale the coordinates based on the respective ratios in order to reflect bounding box on the original image
    startX = int(startX * rW)
    startY = int(startY * rH)
    endX = int(endX * rW)
    endY = int(endY * rH)

    #extract the region of interest
    r = orig[startY:endY, startX:endX]

    #configuration setting to convert image to string.  
    configuration = ("-l eng --oem 1 --psm 8")
    ##This will recognize the text from the image of bounding box
    text = pytesseract.image_to_string(r, config=tessdata_dir_config, lang='spa_old')

    # append bbox coordinate and associated text to the list of results 
    results.append(((startX, startY, endX, endY), text))

In [None]:
#Display the image with bounding box and recognized text
orig_image = orig.copy()

# Moving over the results and display on the image
for ((start_X, start_Y, end_X, end_Y), text) in results:
    # display the text detected by Tesseract
    print("{}\n".format(text))

    # Displaying text
    text = "".join([x if ord(x) < 128 else "" for x in text]).strip()
    cv2.rectangle(orig_image, (start_X, start_Y), (end_X, end_Y),
        (0, 0, 255), 2)
    cv2.putText(orig_image, text, (start_X, start_Y - 30),
        cv2.FONT_HERSHEY_SIMPLEX, 0.7,(0,0, 255), 2)

plt.imshow(orig_image)
plt.title('Output')
plt.show()

# <center> <font color='red'> ANEXOS 2. Codigos de chapaleo y testeo general</font> <center>
    
    Nota: En esta sección se ejecutan pruebas, por tanto puede que algunas celdas no funcionen

In [None]:
def get_extent(dataset):
    cols = dataset.RasterXSize
    rows = dataset.RasterYSize
    transform = dataset.GetGeoTransform()
    minx = transform[0]
    maxx = transform[0] + cols * transform[1] + rows * transform[2]

    miny = transform[3] + cols * transform[4] + rows * transform[5]
    maxy = transform[3]

    return {
            "minX": str(minx), "maxX": str(maxx),
            "minY": str(miny), "maxY": str(maxy),
            "cols": str(cols), "rows": str(rows)
            }

def create_tiles(minx, miny, maxx, maxy, n):
    width = maxx - minx
    height = maxy - miny

    matrix = []

    for j in range(n, 0, -1):
        for i in range(0, n):

            ulx = minx + (width/n) * i # 10/5 * 1
            uly = miny + (height/n) * j # 10/5 * 1

            lrx = minx + (width/n) * (i + 1)
            lry = miny + (height/n) * (j - 1)
            matrix.append([[ulx, uly], [lrx, lry]])

    return matrix


def split(file_name, n):
    raw_file_name = os.path.splitext(os.path.basename(file_name))[0].replace("_downsample", "")
    driver = gdal.GetDriverByName('GTiff')
    dataset = gdal.Open(file_name)
    band = dataset.GetRasterBand(1)
    transform = dataset.GetGeoTransform()

    extent = get_extent(dataset)

    cols = int(extent["cols"])
    rows = int(extent["rows"])

    ##print "Columns: ", cols
    ##print "Rows: ", rows

    minx = float(extent["minX"])
    maxx = float(extent["maxX"])
    miny = float(extent["minY"])
    maxy = float(extent["maxY"])

    width = maxx - minx
    height = maxy - miny

    output_path = os.path.join("data", raw_file_name)
    if not os.path.exists(output_path):
        os.makedirs(output_path)

    ###print "GCD", gcd(round(width, 0), round(height, 0))
    ##print "Width", width
    ##print "height", height


    tiles = create_tiles(minx, miny, maxx, maxy, n)
    transform = dataset.GetGeoTransform()
    xOrigin = transform[0]
    yOrigin = transform[3]
    pixelWidth = transform[1]
    pixelHeight = -transform[5]

    ##print xOrigin, yOrigin

    tile_num = 0
    for tile in tiles:

        minx = tile[0][0]
        maxx = tile[1][0]
        miny = tile[1][1]
        maxy = tile[0][1]

        p1 = (minx, maxy)
        p2 = (maxx, miny)

        i1 = int((p1[0] - xOrigin) / pixelWidth)
        j1 = int((yOrigin - p1[1])  / pixelHeight)
        i2 = int((p2[0] - xOrigin) / pixelWidth)
        j2 = int((yOrigin - p2[1]) / pixelHeight)

        #print i1, j1
        #print i2, j2

        new_cols = i2-i1
        new_rows = j2-j1

        data = band.ReadAsArray(i1, j1, new_cols, new_rows)

        #print data

        new_x = xOrigin + i1*pixelWidth
        new_y = yOrigin - j1*pixelHeight

        ##print new_x, new_y

        new_transform = (new_x, transform[1], transform[2], new_y, transform[4], transform[5])

        output_file_base = raw_file_name + "_" + str(tile_num) + ".tif"
        output_file = os.path.join("data", raw_file_name, output_file_base)

        dst_ds = driver.Create(output_file,
                               new_cols,
                               new_rows,
                               1,
                               gdal.GDT_Float32)

        #writting output raster
        dst_ds.GetRasterBand(1).WriteArray( data )

        tif_metadata = {
            "minX": str(minx), "maxX": str(maxx),
            "minY": str(miny), "maxY": str(maxy)
        }
        dst_ds.SetMetadata(tif_metadata)

        #setting extension of output raster
        # top left x, w-e pixel resolution, rotation, top left y, rotation, n-s pixel resolution
        dst_ds.SetGeoTransform(new_transform)

        wkt = dataset.GetProjection()

        # setting spatial reference of output raster
        srs = osr.SpatialReference()
        srs.ImportFromWkt(wkt)
        dst_ds.SetProjection( srs.ExportToWkt() )

        #Close output raster dataset
        dst_ds = None

        tile_num += 1

    dataset = None

In [None]:
split('SPLIT_IN/example.jpg', 10)

In [None]:
img = PIL.Image.open(os.path.join('SPLIT_IN', 'example.jpg'))
img.save('SPLIT_OUT/EXAMPLE.jpg')