<a href="https://colab.research.google.com/github/cjkreienkamp/wpolo-scoresheet-ocr/blob/main/wpolo_scoresheet_ocr.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

1. rotate image using lines
2. read in cap data
3. read in team data
4. read in remark
5. read in score
6. read in time
7. read in column
8. read in entire game log
9. locate position on page by comparing it to other game sheets, we should have an expected size for that area

## 0. Configure Environment

### 0.1 Imports

In [1]:
import cv2
import numpy as np
import imutils
from imutils import contours
import os
import torch
from typing import Dict, List
import torchvision
import torchvision.transforms as transforms
import pathlib

### 0.2 Create folder structure

In [2]:
if not os.path.exists('output'): os.makedirs('output')
if not os.path.exists('output/ROI'): os.makedirs('output/ROI')
for i in range(1, 76):
  if not os.path.exists(f'output/ROI/{i}'): os.makedirs(f'output/ROI/{i}')

## 1. Align the scoresheet to its template

### 1.0 Functions

In [3]:
from itertools import groupby, product
from typing import List, Tuple
from collections import defaultdict

def align_images(image, template, maxFeatures=500, keepPercent=0.2, debug=False):
	image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	template_gray = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)

 	# detect keypoints and extract features
	orb = cv2.ORB_create(maxFeatures)
	(keyptsA, descsA) = orb.detectAndCompute(image_gray, None)
	(keyptsB, descsB) = orb.detectAndCompute(template_gray, None)

	# match the features
	method = cv2.DESCRIPTOR_MATCHER_BRUTEFORCE_HAMMING
	matcher = cv2.DescriptorMatcher_create(method)
	matches = matcher.match(descsA, descsB, None)

 	# sort the matches by their distance (smaller distance = "more similar")
	matches = sorted(matches, key=lambda x:x.distance)
	keep = int(len(matches) * keepPercent)
	matches = matches[:keep]

  # allocate memory for the keypoints (x, y)-coordinates from the
	# top matches -- use these coordinates to compute homography matrix
	ptsA = np.zeros((len(matches), 2), dtype="float")
	ptsB = np.zeros((len(matches), 2), dtype="float")
	# loop over the top matches
	for (i, m) in enumerate(matches):
		# indicate that the two keypoints in the respective images
		# map to each other
		ptsA[i] = keyptsA[m.queryIdx].pt
		ptsB[i] = keyptsB[m.trainIdx].pt

	(H, mask) = cv2.findHomography(ptsA, ptsB, method=cv2.RANSAC)
	(h, w) = template.shape[:2]
	aligned = cv2.warpPerspective(image, H, (w, h))
	return aligned

def segment_by_angle_kmeans(lines, k=2, **kwargs):
	"""Groups lines based on angle with k-means.

	Uses k-means on the coordinates of the angle on the unit circle
	to segment `k` angles inside `lines`.
	"""

	# Define criteria = (type, max_iter, epsilon)
	default_criteria_type = cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER
	criteria = kwargs.get('criteria', (default_criteria_type, 10, 1.0))
	flags = kwargs.get('flags', cv2.KMEANS_RANDOM_CENTERS)
	attempts = kwargs.get('attempts', 10)

	# returns angles in [0, pi] in radians
	angles = np.array([line[0][1] for line in lines])
	# multiply the angles by two and find coordinates of that angle
	pts = np.array([[np.cos(2*angle), np.sin(2*angle)]
									for angle in angles], dtype=np.float32)

	# run kmeans on the coords
	labels, centers = cv2.kmeans(pts, k, None, criteria, attempts, flags)[1:]
	labels = labels.reshape(-1)  # transpose to row vec

	# segment lines based on their kmeans label
	segmented = defaultdict(list)
	for i, line in enumerate(lines):
			segmented[labels[i]].append(line)
	segmented = list(segmented.values())
	return segmented

def intersection(line1, line2):
	"""Finds the intersection of two lines given in Hesse normal form.

	Returns closest integer pixel locations.
	See https://stackoverflow.com/a/383527/5087436
	"""
	rho1, theta1 = line1[0]
	rho2, theta2 = line2[0]
	A = np.array([
			[np.cos(theta1), np.sin(theta1)],
			[np.cos(theta2), np.sin(theta2)]
	])
	b = np.array([[rho1], [rho2]])
	x0, y0 = np.linalg.solve(A, b)
	x0, y0 = int(np.round(x0)), int(np.round(y0))
	return [x0, y0]


def segmented_intersections(lines):
  """Finds the intersections between groups of lines."""

  intersections = []
  for i, group in enumerate(lines[:-1]):
      for next_group in lines[i+1:]:
          for line1 in group:
              for line2 in next_group:
                  intersections.append(intersection(line1, line2))

  return intersections

def Manhattan(tup1, tup2):
  return abs(tup1[0] - tup2[0]) + abs(tup1[1] - tup2[1])

def groupSimilarPoints(points):
  points = [tuple(point) for point in points]
  man_tups = [sorted(sub) for sub in product(points, repeat = 2)
                                        if Manhattan(*sub) < 5]
  groups_dict = {ele: {ele} for ele in points}
  for tup1, tup2 in man_tups:
    groups_dict[tup1] |= groups_dict[tup2]
    groups_dict[tup2] = groups_dict[tup1]

  groups = [[*next(val)] for key, val in groupby(
      sorted(groups_dict.values(), key = id), id)]

  result = []
  for group in groups:
    if len(group) < 5: continue
    average = [sum(point)/len(point) for point in zip(*group)]
    average[0] = int(average[0])
    average[1] = int(average[1])
    result.append(average)
  result = sorted(result, key=lambda x: (x[1],x[0]))
  return result

def distance(pointA, pointB):
  return np.sqrt((pointA[0] - pointB[0])**2 + (pointA[1] - pointB[1])**2)

### 1.1 Align the scoresheet

In [4]:
scoresheet = cv2.imread('/content/drive/MyDrive/input/scoresheet1.png')
scoresheet_template = cv2.imread('/content/drive/MyDrive/input/scoresheet_template.png')
scoresheet_aligned = align_images(scoresheet, scoresheet_template, debug=True)
cv2.imwrite('output/scoresheet_aligned.png',scoresheet_aligned)

True

### 1.2 Identify first column of the gamelog on the aligned scoresheet

In [5]:
(scoresheet_aligned_height, scoresheet_aligned_width) = scoresheet_aligned.shape[:2]
top_left_gamelog = (int(scoresheet_aligned_width*0.005), int(scoresheet_aligned_height*0.6405))
bottom_right_gamelog = (int(scoresheet_aligned_width*0.79), int(scoresheet_aligned_height*0.995))

scoresheet_bb_gamelog = scoresheet_aligned.copy()
cv2.rectangle(scoresheet_bb_gamelog, top_left_gamelog, bottom_right_gamelog, (0, 0, 255), 3)
#cv2.imwrite('output/scoresheet_bb_gamelog.png',scoresheet_bb_gamelog)
gamelog = scoresheet_aligned[top_left_gamelog[1]:bottom_right_gamelog[1], top_left_gamelog[0]:bottom_right_gamelog[0]]
#cv2.imwrite('output/gamelog.png',gamelog)

(gamelog_height, gamelog_width) = gamelog.shape[:2]
top_left_col1 = (0, 0)
bottom_right_col1 = (int(gamelog_width*0.225), gamelog_height)

gamelog_bb_col1 = gamelog.copy()
cv2.rectangle(gamelog_bb_col1, top_left_col1, bottom_right_col1, (0, 0, 255), 3)
#cv2.imwrite('output/gamelog_bb_col1.png',gamelog_bb_col1)
col1 = gamelog[top_left_col1[1]:bottom_right_col1[1], top_left_col1[0]:bottom_right_col1[0]]
#cv2.imwrite('output/col1.png',col1)

### 1.3 Align first column of the gamelog

In [6]:
col1_corners = col1.copy()
col1_gray = cv2.cvtColor(col1, cv2.COLOR_BGR2GRAY)
col1_gray = np.float32(col1_gray)
dst = cv2.cornerHarris(col1_gray,2,3,0.04)
dst = cv2.dilate(dst,None)
col1_corners[dst>0.01*dst.max()]=[0,0,255]
#cv2.imwrite('output/col1_corners.png',col1_corners)

col1_gray = col1.copy()
col1_gray = cv2.cvtColor(col1_gray, cv2.COLOR_BGR2GRAY)
_, col1_modified = cv2.threshold(col1_gray, 128, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
col1_canny = cv2.Canny(col1_modified, 100, 200, None, 3)
col1_lines = cv2.HoughLines(col1_canny, 1 , np.pi/180 , 150)
#cv2.imwrite('output/col1_gray.png',col1_gray)
#cv2.imwrite('output/col1_canny.png',col1_canny)

segmented_lines = segment_by_angle_kmeans(col1_lines)
intersections = segmented_intersections(segmented_lines)
grouped_intersections = groupSimilarPoints(intersections)

(col1_height, col1_width) = col1.shape[:2]
tl = (col1_height/2,col1_width/2)
bl = (col1_height/2,col1_width/2)
tr = (col1_height/2,col1_width/2)
br = (col1_height/2,col1_width/2)

col1_w_intersections = col1.copy()
for point in grouped_intersections:
  tl = point if distance((0,0), point) < distance(tl, (0,0)) else tl
  bl = [point[0] - 5, point[1] + 8] if distance((0,col1_height), point) < distance(bl, (0,col1_height)) else bl
  tr = [point[0] + 10, point[1]] if distance((col1_width,0), point) < distance(tr, (col1_width,0)) else tr
  br = [point[0] + 6, point[1] + 8] if distance((col1_width,col1_height), point) < distance(br, (col1_width,col1_height)) else br
  cv2.circle(col1_w_intersections, point, 2, (255, 0, 0), -1)
for col_corner in [tl, bl, tr, br]:
  cv2.circle(col1_w_intersections, col_corner, 2, (0, 0, 255), -1)
#cv2.imwrite('output/col1_w_intersections.png',col1_w_intersections)

col_template = cv2.imread('/content/drive/MyDrive/input/col_template.png')
(col_template_height, col_template_width) = col_template.shape[:2]
pts_src = np.array([tl, bl, tr, br])
pts_dst = np.array([[0,0],[0,col_template_height],[col_template_width,0],[col_template_width,col_template_height]])
h, mask = cv2.findHomography(pts_src, pts_dst)
col1_aligned = cv2.warpPerspective(col1, h, (col_template_width, col_template_height))
cv2.imwrite('output/col1_aligned.png',col1_aligned)

  x0, y0 = int(np.round(x0)), int(np.round(y0))


True

## 2. Identify contours within each cell

1. TIME: separate the numbers
2. CAP#: separate the numbers
3. TEAM: find one contour
4. REMARKS: find one contour
5. D-W: separate the numbers

### 2.1 Identify every cell of the first gamelog column

In [7]:
col_template = cv2.imread('/content/drive/MyDrive/input/col_template.png')
col_template_gray = cv2.cvtColor(col_template, cv2.COLOR_BGR2GRAY)
_, col_template_bin = cv2.threshold(col_template_gray, 128, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
col_edges = cv2.Canny(col_template_bin, 100, 200, None, 3)
col_edges = cv2.bitwise_not(col_edges)
col_erode = cv2.erode(col_edges, None, iterations=2)
col_contours = cv2.findContours(col_erode, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
col_contours = col_contours[0] if len(col_contours) == 2 else col_contours[1]
col_contours = [sorted(contour.tolist(), reverse=True) for contour in col_contours if contour[0][0][1]>col_template_height/17]
#cv2.drawContours(col_template, col_contours, -1, (0, 255, 0), 1)

ROI_number = 0
cell_height = 0
cell_width = 0
col1_bb = col1_aligned.copy()
for c in col_contours:
  ROI_number += 1
  if ROI_number > 75: break
  x,y,w,h = cv2.boundingRect(np.array(c))
  (cell_height, cell_width) = (h, w)
  cv2.rectangle(col1_bb, (x-3, y-3), (x+w+3, y+h+3), (36,255,12), 2)
  cv2.rectangle(col_template, (x-3, y-3), (x+w+3, y+h+3), (36,255,12), 2)
  cv2.putText(col1_bb, str(ROI_number), (int(x+w/3), int(y+h*2/3)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,0,0), 1)
  ROI_image = col1_aligned[y:y+h, x:x+w]
  cv2.imwrite(f'output/ROI/{ROI_number}/cell.png',ROI_image)

#cv2.imwrite('output/col_template.png',col_template)
cv2.imwrite('output/col1_bb.png',col1_bb)

True

### 2.2 Find the bounding-box for every contour in a cell, separating each digit/number

In [8]:
ROI_gray_list = []
for ROI_number in range(1, 76):
  ROI_image = cv2.imread(f'output/ROI/{ROI_number}/cell.png')
  gray = cv2.cvtColor(ROI_image, cv2.COLOR_BGR2GRAY)
  binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]
  border = cv2.copyMakeBorder(binary,top=10,bottom=10,left=10,right=10,borderType=cv2.BORDER_CONSTANT,value=[255,255,255])
  erode = cv2.erode(border, None, iterations=1)
  (cnts, hiers) = cv2.findContours(erode.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
  hiers = hiers[0]
  #cnts = imutils.grab_contours(cnts)
  #cnts = contours.sort_contours(cnts, method="left-to-right")[0]

  ROI_image = cv2.copyMakeBorder(ROI_image,top=10,bottom=10,left=10,right=10,borderType=cv2.BORDER_REPLICATE)
  contour_number = 0
  for (cnt, hier) in zip(cnts,hiers):
    if hier[3] != 0: continue
    if cv2.contourArea(cnt) > 800: continue
    #minx = min(cnt.reshape(-1, 2)[:, 0])
    (x, y, w, h) = cv2.boundingRect(cnt)
    if h < 15: continue
    roi = ROI_image[y:y + h, x:x + w]
    cv2.rectangle(ROI_image, (x, y), (x + w, y + h), (36,255,12), 2)
    contour_image = ROI_image[y:y+h, x:x+w]
    contour_number += 1
    cv2.imwrite(f'output/ROI/{ROI_number}/contour{contour_number}.png',contour_image)
  cv2.imwrite(f'output/ROI/{ROI_number}/cell_w_bb.png',ROI_image)

## 3. Classify each cell

1. TIME: MNIST
2. CAP#: MNIST
3. TEAM: D or W
4. REMARKS: 13 classes
5. D-W: MNIST

### 3.0 Functions

In [17]:
def predict_number(model: torch.nn.Module,
                        image_path: str,
                        class_names: List[str] = None,
                        transform=None):

    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    target_image = torchvision.io.read_image(str(image_path)).type(torch.float32)
    target_image = target_image / 255.

    target_image = transforms.functional.resize(target_image, (28, 28))
    target_image = transforms.functional.rgb_to_grayscale(target_image, num_output_channels=1)

    if transform:
        target_image = transform(target_image)

    model.to(device)
    model.eval()
    with torch.inference_mode():
        target_image = target_image.unsqueeze(dim=0) # add an extra dimension to image
        target_image_pred = model(target_image.to(device))

    target_image_pred_probs = torch.softmax(target_image_pred, dim=1) # convert logits --> prediction probabilities
    target_image_pred_label = torch.argmax(target_image_pred_probs, dim=1) # convert prediction probabilities --> prediction labels
    return (f'{class_names[target_image_pred_label.cpu()]} ({target_image_pred_probs.max().cpu():.3f})')

### 3.1 Load model and dataset

In [10]:
model_0 = torch.jit.load('/content/drive/MyDrive/input/mnist_model.pt')

dataset = torchvision.datasets.MNIST(
    root='./',
    train=False,
    download=True,
    transform=transforms.ToTensor())

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 73501940.71it/s]


Extracting ./MNIST/raw/train-images-idx3-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 41598795.96it/s]

Extracting ./MNIST/raw/train-labels-idx1-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz





Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 32872549.83it/s]

Extracting ./MNIST/raw/t10k-images-idx3-ubyte.gz to ./MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 7565738.19it/s]

Extracting ./MNIST/raw/t10k-labels-idx1-ubyte.gz to ./MNIST/raw






In [15]:
print(dataset.classes)

['0 - zero', '1 - one', '2 - two', '3 - three', '4 - four', '5 - five', '6 - six', '7 - seven', '8 - eight', '9 - nine']


### 3.2 Make prediction on one image

In [32]:
output = predict_number(model=model_0,
               image_path=f'output/ROI/1/contour2.png',
               class_names=dataset.classes,
               transform=None)
print(output)

5 - five, 0.995


### 3.3 Make prediction on all images

In [23]:
for cell in range(1, 76):
  if ( cell - 1 ) % 5 in [0, 3, 4]:
    i = 0
    print(f'{cell}: ',end='')
    for path in os.listdir(f'output/ROI/{cell}'):
      i += 1
      if 'contour' in path:
        output = predict_number(model=model_0,
                       image_path=f'output/ROI/{cell}/{path}',
                       class_names=[0,1,2,3,4,5,6,7,8,9],
                       transform=None)
        print(f'{output}   ',end='')
    if i==2:
      print(f'EMPTY',end='')
    print()

1: 5 (0.995)   5 (0.394)   
4: 5 (0.727)   5 (0.988)   
5: 2 (0.652)   5 (0.999)   
6: EMPTY
9: EMPTY
10: 3 (0.686)   
11: EMPTY
14: 2 (0.807)   2 (0.914)   
15: 2 (0.441)   5 (0.983)   
16: EMPTY
19: 5 (0.784)   2 (0.884)   
20: 3 (0.717)   2 (0.999)   
21: EMPTY
24: 5 (0.962)   
25: 2 (0.945)   2 (0.989)   
26: EMPTY
29: 5 (0.625)   5 (0.966)   
30: 2 (0.822)   5 (0.768)   5 (0.979)   
31: EMPTY
34: EMPTY
35: 2 (0.981)   5 (0.997)   5 (0.959)   
36: EMPTY
39: 5 (0.953)   5 (0.553)   
40: 5 (0.698)   2 (0.873)   2 (0.591)   
41: 2 (0.928)   5 (0.719)   
44: 5 (0.990)   
45: 3 (0.682)   5 (0.483)   2 (0.591)   
46: 5 (0.703)   
49: 5 (0.955)   
50: 2 (0.661)   3 (0.940)   
51: 5 (0.830)   
54: 5 (0.992)   
55: EMPTY
56: EMPTY
59: 5 (0.563)   5 (0.976)   
60: 5 (0.997)   5 (0.977)   5 (0.928)   5 (0.980)   
61: EMPTY
64: 5 (0.978)   
65: 5 (0.705)   2 (0.985)   2 (0.868)   
66: 2 (0.533)   2 (0.535)   
69: 2 (0.652)   2 (0.684)   
70: 5 (0.996)   2 (0.512)   5 (0.478)   
71: 2 (0.998)  

## To Do:
* retrain MNIST
* re-number the cells
* get the rest of the columns
* test on other scoresheets
* add HC to MNIST and retrain

