In [None]:
%matplotlib inline
# color and word detection
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import spatial
import cv2
import pytesseract

# clue generator
from gensim.test.utils import datapath, get_tmpfile
from gensim.models import KeyedVectors
from gensim.scripts.glove2word2vec import glove2word2vec
from itertools import zip_longest
from IPython.display import HTML

# Color Card Processing

In [None]:
# color card images
color_card = cv2.imread('assets/color_card.jpg')
color_card2 = cv2.imread('assets/color_card2.PNG')

### 1. Preprocessing Image

In [None]:
# this block is used to try and remove brightness/lighting problems
def preprocess_img(img):
  # Convert the image to LAB color space
  lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)

  # Split the LAB image into its 3 channels
  l, a, b = cv2.split(lab)

  # Apply CLAHE to the L channel
  clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
  cl = clahe.apply(l)

  # increase brightnes of iamge
  limg = cv2.merge((cl,a,b))

  # Convert the image back to BGR color space
  processed_img = cv2.cvtColor(limg, cv2.COLOR_LAB2BGR)

  return processed_img 

processed_color_card = preprocess_img(color_card[200:1400, 275:1400])
processed_color_card2 = preprocess_img(color_card2[250:1750, 250:1750])

In [None]:
# color ranges
lower_blue = np.array([100,150,50])
upper_blue = np.array([140,255,255])

lower_red = np.array([0,50,50])
upper_red = np.array([10,255,255])

lower_beige = np.array([20, 10, 150])
upper_beige = np.array([40, 70, 255])

lower_gray = np.array([50])
upper_gray = np.array([150])

# convert images to HSV standard
red_img = cv2.cvtColor(processed_color_card2.copy(), cv2.COLOR_BGR2HSV)
blue_img = cv2.cvtColor(processed_color_card2.copy(), cv2.COLOR_BGR2HSV)
beige_img = cv2.cvtColor(processed_color_card2.copy(), cv2.COLOR_BGR2HSV)

# find each individual color using color masks
red_mask = cv2.inRange(red_img, lower_red, upper_red)
blue_mask = cv2.inRange(blue_img, lower_blue, upper_blue)
beige_mask = cv2.inRange(beige_img, lower_beige, upper_beige)

In [None]:
# test finding of red squares
plt.imshow(red_mask)

In [None]:
# test finding of blue squares
plt.imshow(blue_mask)

In [None]:
# test finding of beige squares
plt.imshow(beige_mask)

### 2. Contour Detection --> Detect Each Color Square

In [None]:
# contour detection section
gray_color_card2 = cv2.cvtColor(processed_color_card2.copy(), cv2.COLOR_BGR2GRAY)
img_h,img_w = gray_color_card2.shape
background_thresh = gray_color_card2[0][0]
blur = cv2.GaussianBlur(gray_color_card2,(5,5),0)
total_thresh = background_thresh
_,thresh_img = cv2.threshold(blur,total_thresh,255,cv2.THRESH_BINARY)
plt.imshow(thresh_img, cmap="gray", vmin=0, vmax=255)
contours, hier = cv2.findContours(thresh_img,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
top_25_contours = sorted(contours, key=lambda x : cv2.contourArea(x) if cv2.contourArea(x) < (img_h * img_w)/25 else 0,reverse=True)[:25]

# sort x and y later
coords_and_index = []
for i,contour in enumerate(top_25_contours):
    x, y, _, _ = cv2.boundingRect(contour)
    coords_and_index.append((i,x,y))


# sort by y
sorted_y = sorted(coords_and_index,key=lambda x:x[2])


# sort by x
for i in range(5):
    sorted_y[5 * i:5* (i + 1)] = sorted(sorted_y[5 * i:5* (i + 1)], key=lambda x:x[1])
top_25_sorted = [top_25_contours[i[0]] for i in sorted_y]


In [None]:
# hightlight contours and label each square to check correctness
print_img = cv2.cvtColor(thresh_img, cv2.COLOR_BGR2RGB)
cv2.drawContours(print_img, top_25_contours, -1, (0, 255, 0), 3)

font = cv2.FONT_HERSHEY_SIMPLEX

fontScale = 3
color = (255, 0, 0)
thickness = 5
for i, place in enumerate(sorted_y):  
    # print(i, place[1], place[2])
    cv2.putText(print_img, str(i), (place[1] + 10,place[2] + 10), font, 
                   fontScale, color, thickness, cv2.LINE_AA)

plt.imshow(print_img)

### 3. Color Detection --> Detect Color of each Square/Contour

In [None]:
# this stores the color of each respective square
colors = []
hsv_cropped_img = cv2.cvtColor(processed_color_card2[250:1750, 250:1750].copy(), cv2.COLOR_BGR2HSV)

lower_blue = np.array([100,150,50])
upper_blue = np.array([140,255,255])

lower_red = np.array([0,50,50])
upper_red = np.array([10,255,255])

lower_beige = np.array([20, 10, 150])
upper_beige = np.array([40, 80, 255])

for i, place in enumerate(sorted_y):
    # get color in middle of square
    color = hsv_cropped_img[place[2]+90, place[1]+90]
    if all(color <= upper_red) and all(color >= lower_red):
        colors.append((i, "red"))
    elif all(color <= upper_blue) and all(color >= lower_blue):
        colors.append((i, "blue"))
    elif all(color <= upper_beige) and all(color >= lower_beige):
        colors.append((i, "beige"))
    else:
        colors.append((i, "black"))

# Word Cards Processing

### Contour Detection --> Find each word card

In [None]:
word_board = cv2.imread('assets/5x5_2.jpg')
word_board = cv2.rotate(word_board, cv2.ROTATE_90_COUNTERCLOCKWISE)
word_board_gray = cv2.cvtColor(word_board, cv2.COLOR_BGR2GRAY)

img_h,img_w = word_board_gray.shape
background_thresh = word_board_gray[0][0]
ADD_THRESH = 90
blur = cv2.GaussianBlur(word_board_gray,(5,5),0)
total_thresh = background_thresh + ADD_THRESH
_,thresh_img = cv2.threshold(blur,total_thresh,255,cv2.THRESH_BINARY)

kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7,7))
opening = cv2.morphologyEx(thresh_img, cv2.MORPH_OPEN, kernel, iterations=1)
contours, hier = cv2.findContours(opening,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
contours = [contour for contour,h in zip(contours,hier[0]) if h[3] == -1 and h[2] > -1]
top_25_contours = sorted(contours, key=lambda x : cv2.contourArea(x) if cv2.contourArea(x) < (img_h * img_w)/25 else 0,reverse=True)[:25]

# sort x and y later
coords_and_index = []
for i,contour in enumerate(top_25_contours):
    x, y, _, _ = cv2.boundingRect(contour)
    coords_and_index.append((i,x,y))
# print(coords_and_index)

sorted_y = sorted(coords_and_index,key=lambda x:x[2])
# print(sorted_y)

for i in range(5):
    sorted_y[5 * i:5* (i + 1)] = sorted(sorted_y[5 * i:5* (i + 1)], key=lambda x:x[1])
top_25_sorted = [top_25_contours[i[0]] for i in sorted_y]
# print(sorted_y)

***Original Board***

In [None]:
cv2.imshow('Original Board', cv2.resize(word_board, (960, 540)))
cv2.waitKey(0)

***Thresholded Image***

In [None]:
cv2.imshow('Thresholded Image', cv2.resize(opening, (960, 540)))
cv2.waitKey(0)

***Show Contours***

In [None]:
print_img = cv2.cvtColor(thresh_img, cv2.COLOR_BGR2RGB)
cv2.drawContours(print_img, top_25_contours, -1, (0, 255, 0), 3)

font = cv2.FONT_HERSHEY_SIMPLEX

fontScale = 10
color = (255, 0, 0)
thickness = 5
for i, place in enumerate(sorted_y):  
    cv2.putText(print_img, str(i), (place[1] + 10,place[2] + 10), font, 
                   fontScale, color, thickness, cv2.LINE_AA)
imS = cv2.resize(print_img, (960, 540)) 
cv2.imshow('Contours', imS)
cv2.waitKey(0)

In [None]:
def flattener(image, pts, w, h):
    """Flattens an image of a card into a top-down 200x300 perspective.
    Returns the flattened, re-sized, grayed image.
    See www.pyimagesearch.com/2014/08/25/4-point-opencv-getperspective-transform-example/"""
    temp_rect = np.zeros((4,2), dtype = "float32")
    
    s = np.sum(pts, axis = 2)

    tl = pts[np.argmin(s)]
    br = pts[np.argmax(s)]

    diff = np.diff(pts, axis = -1)
    tr = pts[np.argmin(diff)]
    bl = pts[np.argmax(diff)]

    # Need to create an array listing points in order of
    # [top left, top right, bottom right, bottom left]
    # before doing the perspective transform

    if w <= 0.8*h: # If card is vertically oriented
        temp_rect[0] = tl
        temp_rect[1] = tr
        temp_rect[2] = br
        temp_rect[3] = bl

    if w >= 1.2*h: # If card is horizontally oriented
        temp_rect[0] = bl
        temp_rect[1] = tl
        temp_rect[2] = tr
        temp_rect[3] = br

    # If the card is 'diamond' oriented, a different algorithm
    # has to be used to identify which point is top left, top right
    # bottom left, and bottom right.
    
    if w > 0.8*h and w < 1.2*h: #If card is diamond oriented
        # If furthest left point is higher than furthest right point,
        # card is tilted to the left.
        if pts[1][0][1] <= pts[3][0][1]:
            # If card is titled to the left, approxPolyDP returns points
            # in this order: top right, top left, bottom left, bottom right
            temp_rect[0] = pts[1][0] # Top left
            temp_rect[1] = pts[0][0] # Top right
            temp_rect[2] = pts[3][0] # Bottom right
            temp_rect[3] = pts[2][0] # Bottom left

        # If furthest left point is lower than furthest right point,
        # card is tilted to the right
        if pts[1][0][1] > pts[3][0][1]:
            # If card is titled to the right, approxPolyDP returns points
            # in this order: top left, bottom left, bottom right, top right
            temp_rect[0] = pts[0][0] # Top left
            temp_rect[1] = pts[3][0] # Top right
            temp_rect[2] = pts[2][0] # Bottom right
            temp_rect[3] = pts[1][0] # Bottom left
            
        
    maxWidth = 200
    maxHeight = 300

    # Create destination array, calculate perspective transform matrix,
    # and warp card image
    dst = np.array([[0,0],[maxWidth-1,0],[maxWidth-1,maxHeight-1],[0, maxHeight-1]], np.float32)
    M = cv2.getPerspectiveTransform(temp_rect,dst)
    warp = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
    warp = cv2.cvtColor(warp,cv2.COLOR_BGR2GRAY)

        

    return warp

### Find Words on Each Card

In [None]:
def find_words(top_25_sorted):
    words = []
    for cont in top_25_sorted:
        peri = cv2.arcLength(cont,True)
        approx = cv2.approxPolyDP(cont,0.01*peri,True)
        pts = np.float32(approx)
        corner_pts = pts

        x,y,w,h = cv2.boundingRect(cont)
        width, height = w, h

        average = np.sum(pts, axis=0)/len(pts)
        cent_x = int(average[0][0])
        cent_y = int(average[0][1])
        center = [cent_x, cent_y]

        warp = cv2.rotate(flattener(word_board, pts, w, h),cv2.ROTATE_90_COUNTERCLOCKWISE)
        cropped_img = warp[warp.shape[0]//2 + 20: warp.shape[0]-20, 20:warp.shape[1]-20]
        blur = cv2.GaussianBlur(cropped_img, (3,3), 0)
        contrast = cv2.convertScaleAbs(blur, alpha=1.3, beta=0)
        thresh = cv2.threshold(contrast, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
        words.append(pytesseract.image_to_string(thresh, lang='eng', config='--psm 6').strip())
        # cv2.imshow('warp', warp)
        # cv2.waitKey(0)
        # cv2.imshow('warp',cropped_img)
        # cv2.waitKey(0)
        # cv2.imshow('Contrast', contrast)
        # cv2.waitKey(0)
        # cv2.imshow('thresh', thresh)
        # cv2.waitKey(0)
    return words

words = find_words(top_25_sorted)


# Clue Generator

In [None]:
glove_file = datapath('/assets/glove.6B.100d.txt')
word2vec_glove_file = get_tmpfile("glove.6B.100d.word2vec.txt")
glove2word2vec('assets/glove.6B.100d.txt', word2vec_glove_file)

model = KeyedVectors.load_word2vec_format(word2vec_glove_file)

***Word2Vec Method***

In [None]:
answers = ["missile", "father", "spring", "soul", "film", "cast"]
bad = ["piano", "sub", "rock", "ham"]

filtered_answers = []
filtered_bad = []

for word in answers:
    try:
        _ = model[word]
        filtered_answers.append(word)
    except KeyError:
        continue

for word in bad:
    try:
        _ = model[word]
        filtered_bad.append(word)
    except KeyError:
        continue

model.most_similar(positive=filtered_answers, negative=filtered_bad)

***Cosine Similarity Method***

In [None]:
# cosine approach https://jsomers.net/glove-codenames/

embeddings = {}
with open("/content/glove.6B.100d.txt", 'r') as f:
    for line in f:
        values = line.split()
        word = values[0]
        vector = np.asarray(values[1:], "float32")
        embeddings[word] = vector

def distance(word, reference):
    return spatial.distance.cosine(embeddings[word], embeddings[reference])

def closest_words(reference):
    return sorted(embeddings.keys(), key=lambda w: distance(w, reference))

def goodness(word, answers, bad):
    if word in answers + bad: return -999
    return sum([distance(word, b) for b in bad]) - 4.0 * sum([distance(word, a) for a in answers])

def minimax(word, answers, bad):
    if word in answers + bad: return -999
    return min([distance(word, b) for b in bad]) - max([distance(word, a) for a in answers])

def candidates(answers, bad, size=100):
    best = sorted(embeddings.keys(), key=lambda w: -1 * goodness(w, answers, bad))
    res = [(str(i + 1), "{0:.2f}".format(minimax(w, answers, bad)), w) for i, w in enumerate(sorted(best[:250], key=lambda w: -1 * minimax(w, answers, bad))[:size])]
    return [(". ".join([c[0], c[2]]) + " (" + c[1] + ")") for c in res]

def grouper(n, iterable, fillvalue=None):
    args = [iter(iterable)] * n
    return zip_longest(fillvalue=fillvalue, *args)

def tabulate(data):
    data = list(grouper(10, data))
    return HTML(pd.DataFrame(data).to_html(index=False, header=False))

In [None]:
answers = ["happy", "cup", "dog"]
bad = ["table"]

tabulate(candidates(answers, bad))