Importing OpenCV and Tesseract libraries.

In [1]:
import cv2
import pytesseract
import numpy as np
import regex as re

pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'

In [111]:
import pkg_resources
pkg_resources.working_set.by_key['pytesseract'].version

'0.3.10'

This can extract a cartboard box from image using visual processing. Only works when box is moving on a different color conveyor. Uses color separation.

In [6]:
img = cv2.imread('package.jpg')
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
th = cv2.threshold(hsv[:,:,0],127,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)[1]

def get_region(image):
    contours, hierarchy = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
    c = max(contours, key = cv2.contourArea)
    black = np.zeros((image.shape[0], image.shape[1]), np.uint8)
    mask = cv2.drawContours(black,[c],0,255, -1)
    return mask

mask = get_region(th)

masked_img = cv2.bitwise_and(img, img, mask = mask)

cv2.imwrite("masked_image.jpg", masked_img)

True

Reading image file

In [7]:
img = cv2.imread("Picture1.jpg.png")
height, width, channel = img.shape


def process(img):
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(img_gray, 128, 255, cv2.THRESH_BINARY)
    img_blur = cv2.GaussianBlur(thresh, (5, 5), 2)
    img_canny = cv2.Canny(img_blur, 0, 0)
    return img_canny

def get_contours(img):
    contours, _ = cv2.findContours(process(img), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
    r1, r2 = sorted(contours, key=cv2.contourArea)[-3:-1]
    x, y, w, h = cv2.boundingRect(np.r_[r1, r2])
    cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 255), 2)
    ROI = img[y:y+h, x:x+w]
    cv2.imwrite('bounds.jpg', ROI)
    return ROI

In [8]:
filename = 'bounds.jpg'
mainImage = cv2.imread(filename)

Method used for splitting image into 4 parts

In [9]:
def splitImage(image):
    width = image.shape[1]
    # Cut the image in half
    width_cutoff = width // 2
    left1 = image[:, :width_cutoff]
    right1 = image[:, width_cutoff:]
    ##########################################
    # Processing left side
    ##########################################
    # Rotating left side by 90 degrees and splitting it in half
    image = cv2.rotate(left1, cv2.ROTATE_90_CLOCKWISE)
    width = image.shape[1]
    width_cutoff = width // 2
    l1 = image[:, :width_cutoff]
    l2 = image[:, width_cutoff:]
    # Restoring rotation saving parts of image as new images
    l1 = cv2.rotate(l1, cv2.ROTATE_90_COUNTERCLOCKWISE)
    cv2.imwrite("bottom_left.jpg", l1)
    l2 = cv2.rotate(l2, cv2.ROTATE_90_COUNTERCLOCKWISE)
    cv2.imwrite("top_left.jpg", l2)
    ##########################################
    # Processing right side
    ##########################################
    # Rotating right side by 90 degrees and splitting it in half
    image = cv2.rotate(right1, cv2.ROTATE_90_CLOCKWISE)
    width = image.shape[1]
    width_cutoff = width // 2
    r1 = image[:, :width_cutoff]
    r2 = image[:, width_cutoff:]
    # Restoring rotation saving parts of image as new images
    r1 = cv2.rotate(r1, cv2.ROTATE_90_COUNTERCLOCKWISE)
    cv2.imwrite("bottom_right.jpg", r1)
    r2 = cv2.rotate(r2, cv2.ROTATE_90_COUNTERCLOCKWISE)
    cv2.imwrite("top_right.jpg", r2)
    image_data = [l1, l2, r1, r2]

    return image_data

Transforming image using OpenCV and extracting numbers using Tesseract. The image is blurred, thresholded and diluted. Then Tesseract extracts test with flags to only search for digits.

In [117]:
def getText(name, fileIsImage=False):
    # If fileIsImage = False, reads image from file, otherwise assumes image is already given
    if(not fileIsImage):
        img = cv2.imread(name)
    else:
        img = name
    # Converting image colorspace from BGR(Blue Green Red) to HSV(Hue Saturation Value) for processing
    HSV_img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    # Splitting image values into 3 variables
    h,s,v = cv2.split(HSV_img)
    # Removing image noise using Gaussian Blur
    v = cv2.GaussianBlur(v, (1,1), 0)
    # Using Otsu's thresholding algorithm to clear up the image
    thresh = cv2.threshold(v, 128, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
    # Saving processed image
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, ksize=(1, 2))
    # Dilating image
    thresh = cv2.dilate(thresh, kernel)
    cv2.imwrite("transformed.png", thresh)
    # Using tesseract to extract numbers
    txt = pytesseract.image_to_string(thresh, config="--psm 6 digits")
    return txt
    

In [2]:
#skew correction
def deskew(image):
    coords = np.column_stack(np.where(image > 0))
    angle = cv2.minAreaRect(coords)[-1]
    angle = 0
    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
    return rotated

In [3]:
def getText2(name, fileIsImage=False):
    # If fileIsImage = False, reads image from file, otherwise assumes image is already given
    if(not fileIsImage):
        img = cv2.imread(name)
    else:
        img = name
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    #noise removal
    noise=cv2.medianBlur(gray,3)
    # thresholding# converting it to binary image by Thresholding
    # this step is require if you have colored image because if you skip this part
    # then tesseract won’t able to detect text correctly and this will give incorrect #result
    thresh = cv2.threshold(noise, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
    # thresh = cv2.bitwise_not(thresh)
    cv2.imwrite("transformed.png", thresh)
    # Using tesseract to extract numbers
    # txt = pytesseract.image_to_string(thresh)
    txt = pytesseract.image_to_string(thresh, config="--psm 6 digits")
    return txt

In [154]:
filename = 'frame6.png'
mainImage = cv2.imread(filename)
text = getText2(mainImage[220:400,300:,:], True)
print(text)

504.832.89



In [6]:
img = cv2.imread('frame3.png')

d = pytesseract.image_to_data(img, output_type=pytesseract.Output.DICT)
print(d.keys())

dict_keys(['level', 'page_num', 'block_num', 'par_num', 'line_num', 'word_num', 'left', 'top', 'width', 'height', 'conf', 'text'])


In [7]:
n_boxes = len(d['text'])
for i in range(n_boxes):
    if int(d['conf'][i]) > 60:
        (x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i])
        img = cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)

cv2.imwrite('test555.png', img)


True

FACT: vid_1 has 11 boxes captured

In [6]:
path = "vid_1.MOV"

vidcap = cv2.VideoCapture(path)
success,frame = vidcap.read()
framenbr = 0

while success:
    cv2.imwrite("frame%d.png" % framenbr, frame[500:1020,1080:1680]) 
    success,frame = vidcap.read()
    if not success:
        break
    #pic = get_contours(frame)
    text = getText2(frame[500:1020,1080:1680], True)
    print(text)
    framenbr += 1

NameError: name 'getText2' is not defined

---
Tomas playground

Below code loops through the video reading every {rateReduction}th frame.<br>
video length: 44sec<br>
executes in: 35sec<br>
conclusion: quality too good?

In [8]:
path = "vid_1.MOV"
vidcap = cv2.VideoCapture(path)
rateReduction = 70
framenbr = 0
success,frame = vidcap.read()
count = 0

while success:
    success, frame = vidcap.read()
    if not success:
        break
    cv2.imwrite("frame%d.png" % framenbr, frame) 
    # text = getText2(frame[500:1020,1080:1680], True)
    count += rateReduction # i.e. at 30 fps, this advances one second
    vidcap.set(cv2.CAP_PROP_POS_FRAMES, count)
    # print(text)
    framenbr += 1

: 

: 

---

In [None]:
searchID = '704.035.93'
images = splitImage(mainImage)
for image in images:
    text = getText(image, True)
    text = re.sub('[^\d\.]', '', text)
    if(searchID in text):
        print("Found")
        break
    print("Not found")

Not found
Not found
Not found
Not found
