# PREPROCESSING DATASET: OpenCV FILTERS TO IMPROVE FILES
- convert to grayscale
- noise removal
- thresolding
- dilation
- erosion
- canny edge detection
- skew correction

In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

In [None]:
# IMAGES PRE-PROCESSING

# get grayscale image
def get_grayscale(image):
    return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# noise removal
def remove_noise(image):
    return cv2.medianBlur(image,5)

#thresholding
def thresholding(image):
    # threshold the image, setting all foreground pixels to
    # 255 and all background pixels to 0
    return cv2.threshold(image, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]

#dilation
def dilate(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.dilate(image, kernel, iterations = 1)

#erosion
def erode(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.erode(image, kernel, iterations = 1)

#opening - erosion followed by dilation
def opening(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)

#canny edge detection
def canny(image):
    return cv2.Canny(image, 100, 200)

#skew correction
def deskew(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = cv2.bitwise_not(gray)
    thresh = cv2.threshold(gray, 0, 255,
        cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
    coords = np.column_stack(np.where(thresh > 0))
    angle = cv2.minAreaRect(coords)[-1]
    if angle < -45:
        angle = -(90 + angle)
    else:
        angle = -angle
    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)
    print('angulo: ', angle)
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(image, M, (w, h),
        flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)    
    return rotated
 

In [None]:
# FILTER TEST

imagen = Image.open(zipimg.open(zipimg.namelist()[0]))
plt.figure(figsize=(100,100))
imgplot = plt.imshow(imagen)

plt.show()


In [None]:
# REPLACE THE FILE TO TEST

file_path = '/content/Estatuto-1.pdf-0.png'

from google.colab.patches import cv2_imshow
imagencv2 = cv2.imread(file_path)
imagencv2 = cv2.cvtColor(imagencv2, cv2.COLOR_RGB2GRAY)
imagencv2 = remove_noise(imagencv2)
print(imagencv2.shape)
cv2_imshow(imagencv2)

#gray = get_grayscale(imagencv2)
#fig = plt.figure()
#fig.set_size_inches(np.array(fig.get_size_inches())
#plt.imshow(imagencv2)
#plt.show()
