In [5]:
import cv2
import pytesseract
import numpy as np
import os
from tokenize import tokenize

from assets.fish_list import get_all

In [6]:
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
fish_list = get_all()

In [7]:
# Utility functions

def rename_screenshot(currname):
        s = currname.replace('Screenshot (', 'sc')
        s = s.replace(')', '')
        try:
            os.rename(f'screens/{f}', f'screens/{s}')
        except Exception:
            print(Exception)
            print('Already renamed or file not found.')

def show_img(image, name = 'Preview'):
    cv2.imshow(name, image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

In [None]:
# renaming default Windows names to more compact names, as default names were resulting in pytesseract not being able to find
# the filepath
for _, _, files in os.walk('screens'):
    for f in files:
        rename_screenshot(f)

In [272]:
# various utility functions for images that I didn't write

#dilation
def dilate(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.dilate(image, kernel, iterations = 1)

#erosion
def erode(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.erode(image, kernel, iterations = 1)

#opening - erosion followed by dilation
def opening(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)

def get_grayscale(image):
    return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

def image_to_string_numbers(image):
    custom_config = r'--oem 3 --psm 6 outputbase digits'
    return pytesseract.image_to_string(img, config=custom_config)


# crop functions for specific parts of the Fishbrain catch details GUI
def crop_screenshot(image):
    cropped_image = image[0:1000, 710:1200]
    return cropped_image


# main info: species, size, c&r, date
def _get_main_info(image):
    info_all = ['Fish species', 'Length', 'Weight', 'Catch and release', 'Date']
    text = pytesseract.image_to_string(image[470:730, 800:950])
    info_here = list()
    for inf in info_all:
        if inf in text:
            info_here.append(inf)
            
    info_populated = dict()
    for idx, inf in enumerate(info_here):
        weight_shift = 0
        if inf == 'Weight':
            weight_shift = 20
        crop = image[472+50*idx:522+50*idx, 955:1150 - weight_shift]
        ocr_text = pytesseract.image_to_string(crop)
#         print(ocr_text)
        info_populated[inf] = ocr_text
        
    return info_populated


# assumptions:
#   - screenshot is of page immediately after tapping 'details' before any scrolling
def _crop_info(image, zone):
    
    info_all = ['Fish species', 'Length', 'Weight', 'Catch and release', 'Date']
    main_info = _get_main_info(image)
    if zone in [s.lower() for s in info_all]:
        print('Zone is in main info section. Use other function.')
    else:
        info_shift = len(main_info) * 50
        zone_switch = {
            'water temp' : [570+info_shift, 630+info_shift, 965, 1020]
            , 'air temp' : [570+info_shift, 630+info_shift, 799, 850]
            , 'location' : [100+info_shift, 180+info_shift, 710, 1200]
            , 'precip' : [630+info_shift, 660+info_shift, 710, 890]
            , 'wind' : [580+info_shift, 640+info_shift, 1130, 1200]
            , 'time' : [750+info_shift, 780+info_shift, 710, 850]
            , 'moon' :[750+info_shift, 780+info_shift, 850, 1060]
            , 'pressure' : [750+info_shift, 780+info_shift, 1060, 1200]
        }

        dims = zone_switch[zone]
        return image[dims[0] : dims[1], dims[2]: dims[3]]

In [273]:
img = cv2.imread('assets/screens/sc2.png')
img_g = get_grayscale(img)
img_c = cv2.convertScaleAbs(img_g, alpha=1.0, beta = 1)

img_ = _crop_info(img_c, 'time')
show_img(img_)

# cropped_image = img_c[950:980, 1060:1200]
# cropped_image = img_c[780:840, 1130:1200]

# text = pytesseract.image_to_string(cropped_image)
# print(text)
# show_img(cropped_image)

In [281]:

for i in range(2, 16):
    if i != 9:
        path = f'assets/screens/sc{i}.png'
        img = cv2.imread(path)
        other_info = dict()
        zones = ['water temp', 'air temp', 'location', 'precip', 'wind' , 'time', 'moon', 'pressure']
        for z in zones:
            other_info[z] = pytesseract.image_to_string(_crop_info(img, z))
        print()
#         print(_get_main_info(img))
        
# img_ = _crop_info(img, 'time')
#         cropped_image = img[0:1000, 710:1200]
#         text = pytesseract.image_to_string(cropped_image)
#         for fish in fish_list:
#             if fish in text:
#                 print(fish)


{'water temp': '34°\n', 'air temp': '', 'location': 'Four Mile Run\n', 'precip': 'Light rain\n', 'wind': '7kn\n', 'time': '16:22.\n', 'moon': 'Last Quarter\n', 'pressure': '1020.00 hPa\n'}
{'water temp': '', 'air temp': 'To\n', 'location': 'Date 1/25/22\n', 'precip': 'RAPALA\n\n[ ss VX\n', 'wind': '', 'time': 'a,\n', 'moon': '', 'pressure': ''}
{'water temp': '', 'air temp': '30°\n', 'location': 'Four Mile Run\n', 'precip': 'Sunny\n', 'wind': '9kn\nNNW\n', 'time': '', 'moon': '', 'pressure': ''}
{'water temp': 'peer ane\n', 'air temp': '', 'location': 'Main info\n', 'precip': '', 'wind': '', 'time': '', 'moon': '', 'pressure': ''}
{'water temp': '37°\n', 'air temp': '32°\n', 'location': '', 'precip': 'Partly cloudy\n', 'wind': '4kn\n', 'time': '17:36\n', 'moon': 'Last Quarter\n', 'pressure': '1009.00 hPa\n'}
{'water temp': 'peratur\n', 'air temp': '', 'location': 'Four Mile Run\n', 'precip': '', 'wind': '', 'time': '', 'moon': '', 'pressure': ''}
{'water temp': '39°\n', 'air temp': '27