In [1]:
# Librerías utilizadas por el script
import os
import cv2
import zipfile
import csv
import pathlib
import shutil

import numpy as np
import pandas as pd
import sklearn as sk
import matplotlib.pyplot as plt
import seaborn as sns
import easyocr


from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from copy import deepcopy

SEVERANCE_PATH = "datasets/Severance"

In [2]:
# Funciones comunes para la lectura y muestra de datos

'''
This function receives a string with the filename of the image to read,
and a flag indicating if we want to read it in color/RGB (flagColor=1) or gray level (flagColor=0)

Example of use:
im1=readIm(get_image('apple.jpg'),0)
'''


def readIm(filename, flagColor=1):
    # cv2 reads BGR format
    im = cv2.imread(filename)
    # change to  RGB and return the image
    if (flagColor):
        return cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
    # change from BGR to grayscale instead if flag is 0
    return cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)


'''
This function receives an array of arbitrary real numbers (that could include even negative values),
and returns an 'image' in the range [0,1].
flag_GLOBAL allows the user to normalize the whole image (including all channels) or to normalize
each channel/band independently.
'''


def rangeDisplay01(im, flag_GLOBAL=True):
    im = im.astype(float)
    if flag_GLOBAL:
        im = (im - im.min()) / (im.max() - im.min())
    else:
        # bands normalization
        for band in range(im.shape[2]):
            im[:, :, band] = (im[:, :, band] - im[:, :, band].min()) / (im[:, :, band].max() - im[:, :, band].min())
            # Note: remember that, for plt.imshow with RGB data, the valid range is [0..1] for floats and [0..255] for integers.
    return im


"""
Función para mostrar imágenes en pantalla en color y blanco negro. Permite realizar
aumento sobre las mismas para apreciar un mayor detalle.

Entrada:
    im: imagen leída en formato ndarray
    title: nombre que recibe el marco en pantalla
    factor: factor de aumento de la image, "zoom"
"""


def displayIm(im, title='Result', factor=2):
    # First normalize range
    max = np.max(im)
    min = np.min(im)
    if min < 0 or max > 255:
        im = rangeDisplay01(im, flag_GLOBAL=True)
    if len(im.shape) == 3:
        # im es tribanda
        plt.imshow(im, cmap='jet')
    else:
        # im es monobanda
        plt.imshow(im, cmap='gray')
    figure_size = plt.gcf().get_size_inches()
    plt.gcf().set_size_inches(factor * figure_size)
    plt.title(title)
    plt.xticks([]), plt.yticks([])  # eliminamos numeración
    plt.show()

In [3]:
# Lectura de las imágenes y del csv asociado.
df_sev = pd.read_excel(SEVERANCE_PATH + '/SeveranceA.xls', header=None)
display(df_sev)

Unnamed: 0,0,1,2,3,4,5,6
0,actinickeratosis,60,M,R/O AK\nR/O SCC,actinickeratosis;squamouscellcarcinoma;,Lt. cheek,Head&Neck
1,actinickeratosis,68,F,R/O AK,actinickeratosis;,perioral area,Head&Neck
2,actinickeratosis,39,F,R/O AK\nR/O SK,actinickeratosis;seborrheickeratosis;,nose,Head&Neck
3,actinickeratosis,63,F,R/O AK,actinickeratosis;,cheek,Head&Neck
4,actinickeratosis,66,F,R/O SK\nR/O AK,seborrheickeratosis;actinickeratosis;,nose,Head&Neck
...,...,...,...,...,...,...,...
10421,spitznevus,18,F,R/O inflammed intradermal nevus,melanocyticnevus;,cheek,Head&Neck
10422,spitznevus,13,F,R/O GP,pyogenicgranuloma;,Lt. calf,Leg
10423,spitznevus,14,F,R/O spitz nevus,otherbenign;,lower leg,Leg
10424,spitznevus,8,M,R/O hemangioma,hemangioma;,Rt. thigh,Leg


In [4]:
grosor_borde = 8  # Constante del borde a eliminar (px)

reader = easyocr.Reader(['en'])
# Recortado
def recortarImagenesSEVERANCE(path):
    #Creamos directorio de imágenes recortadas
    os.chdir(path)
    df_sev = pd.read_excel('SeveranceA.xls', header=None)

    if not os.path.exists('thumbnails'):
        os.makedirs('thumbnails')

    # Listamos las imágenes en rejilla disponibles
    files = [f for f in pathlib.Path().iterdir() if f.is_file()]

    names = []
    diss_class = []
    etiqueta = ""

    trainProcessed = []

    for f in files:
        if "jpg" in str(f):
            #Retiramos la extensión del fichero
            name = str(f)[:-4]

            # Leemos la imagen
            image = cv2.imread(str(f), cv2.IMREAD_UNCHANGED)[2:-6]

            # Dividimos la rejilla
            image_split = np.split(image[1:-1], 8)

            i = 0
            for im in image_split:
                # Recorta la imagen en columnas
                image_split_col = np.split(im[:, 2:-3], 15, axis=1)
                # Para cada imagen
                for imcol in image_split_col:
                    # Eliminar el borde
                    img_sin_borde = imcol[grosor_borde:-grosor_borde, grosor_borde:-grosor_borde]
                    if (np.count_nonzero(imcol == 255) > 20000):  # Si es imagen de separador con nombre
                        etiqueta=reader.readtext(img_sin_borde) # Cambiamos de etiqueta al siguiente paciente
                        print(etiqueta)
                        continue
                    """
                    else:
                        actual = list(df_sev.iloc[j])
                        actual.append(f"{name}_{i}.png")
                        trainProcessed.append(actual)
                    # Guardamos la imagen y añadimos su nombre
                    cv2.imwrite(f"thumbnails/{name}_{i}.png", img_sin_borde)
                    names.append(f"{name}_{i}.png")
                    i += 1
                    """

                diss_class.append(name)

    print("Miniaturas separadas")
    # Creamos el dataframe con las imágenes recortadas
    #trainProcessedDF = pd.DataFrame(trainProcessed)
    #trainProcessedDF.columns = ["label", "age", "sex", "abrev_diagnosis", "diagnosis", "fine_loc", "loc", "image"]

    # Salvamos a disco duro la información
    #trainProcessedDF.to_csv("severanceTrainingSet_thumbnails.csv", sep=',', encoding='utf-8')

    print("Fichero creado correctamente")

    os.chdir("../../")

Downloading detection model, please wait. This may take several minutes depending upon your network connection.


Progress: |██████████████████████████████████████████████████| 100.0% Complete

Downloading recognition model, please wait. This may take several minutes depending upon your network connection.


Progress: |██████████████████████████████████████████████████| 100.0% Complete

In [5]:
# Obtenemos las miniaturas
recortarImagenesSEVERANCE(SEVERANCE_PATH)

[([[0, 9], [71, 9], [71, 23], [0, 23]], 'Vyrizgona', 0.1821534145633237)]
[([[0, 9], [71, 9], [71, 23], [0, 23]], 'Vyrizgona', 0.19489391475565895)]
[([[0, 9], [71, 9], [71, 23], [0, 23]], 'Vyrizgona', 0.190213432126417)]
[([[0, 9], [71, 9], [71, 23], [0, 23]], 'Vyrizgona', 0.16189831928449291)]
[([[0, 9], [71, 9], [71, 23], [0, 23]], 'Vyrizgona', 0.18644925349566507)]
[([[0, 9], [71, 9], [71, 23], [0, 23]], 'Vyrizgona', 0.20598105527090171)]
[([[0, 9], [71, 9], [71, 23], [0, 23]], 'Vyrizgona', 0.19457308908842774)]
[([[0, 9], [71, 9], [71, 23], [0, 23]], 'Vyrizgona', 0.17222691494359624)]
[([[0, 9], [71, 9], [71, 23], [0, 23]], 'Vyrizgona', 0.16838903063026475)]
[([[0, 9], [81, 9], [81, 23], [0, 23]], 'vezoualake', 0.4165133811046178)]
[([[0, 9], [47, 9], [47, 23], [0, 23]], 'vezors', 0.31599311815984354), ([[53, 9], [81, 9], [81, 21], [53, 21]], 'ake', 0.9998924953191473)]
[([[0, 9], [47, 9], [47, 21], [0, 21]], 'vezora', 0.1249969593363594), ([[53, 9], [81, 9], [81, 21], [53, 21]], 

KeyboardInterrupt: 