# Reading in the files

In [15]:
import cv2
import numpy as np

#Read the image, convert it into grayscale, and make in binary image for threshold value of 1.
img = cv2.imread('frame.png')
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
_,thresh = cv2.threshold(gray,1,255,cv2.THRESH_BINARY)

#Now find contours in it. There will be only one object, so find bounding rectangle for it.

contours = cv2.findContours(thresh,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
cnt = contours[0]
x,y,w,h = cv2.boundingRect(cnt)

#Now crop the image, and save it into another file.

crop = img[y:y+h,x:x+w]
cv2.imwrite('framemod.png',crop)

True

In [26]:
import numpy as np

def autocrop(image, threshold=0):
    """Crops any edges below or equal to threshold

    Crops blank image to 1x1.

    Returns cropped image.

    """
    if len(image.shape) == 3:
        flatImage = np.max(image, 2)
    else:
        flatImage = image
    assert len(flatImage.shape) == 2

    rows = np.where(np.max(flatImage, 0) > threshold)[0]
    if rows.size:
        cols = np.where(np.max(flatImage, 1) > threshold)[0]
        image = image[cols[0]: cols[-1] + 1, rows[0]: rows[-1] + 1]
    else:
        image = image[:1, :1]

    return image

img = cv2.imread('frame.png')
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
crop = autocrop(gray)
cv2.imwrite('framemodOther.png',crop)



True

In [23]:
def crop_image(img,tol=0):
    # img is image data
    # tol  is tolerance
    mask = img>tol
    return img[np.ix_(mask.any(1),mask.any(0))]

img = cv2.imread('frame.png')
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
crop = autocrop(gray)
cv2.imwrite('framemodThird.png',crop)

True

## Image histogram equalization

In [40]:
from skimage import data, img_as_float
from skimage import exposure
import cv2
import numpy as np

img1 = cv2.imread('histogramNormalization/first.png',0)
img2 = cv2.imread('histogramNormalization/second.png',0)
# Equalization
#img = cv2.imread('wiki.jpg',0)
equ1 = cv2.equalizeHist(img1)
equ2 = cv2.equalizeHist(img2)
res = np.hstack((equ1,equ2)) #stacking images side-by-side
cv2.imwrite('histogramNormalization/res.png',res)


# create a CLAHE object (Arguments are optional).
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
cl1 = clahe.apply(img1)
cl2 = clahe.apply(img2)
res_cl = np.hstack((cl1, cl2)) #stacking images side-by-side
cv2.imwrite('histogramNormalization/res_cl.png',res_cl)



True

# distances between hex values

In [40]:
# This function takes in a list of key value pairs (image key, hex value string) and 
# Finds all combinations of the pairs and then calculated the number of different bits between two hash strings
# It returns a sorted list, sorted in reverse order so the pairs with the highest difference are on the top of the list
def find_hamming_distances(tuple_pair_array):
    distances = []

    # find all combinations
    for i in combinations(tuple_pair_array, 2):
        distance =  get_hamming_distance(i[0][1],i[1][1])
        distances.append((i[0],i[1],distance))

    distances = sorted(distances, key =  lambda x:x[2], reverse=True)

    for distance_pair in distances:
        output = "{}|{} - {}|{} - {}".format(
            distance_pair[0][0],
            distance_pair[1][0],
            distance_pair[0][1],
            distance_pair[1][1],
            distance_pair[2]
        )

        print output


# Functions that finds number of different bits between two hash strings  
def get_hamming_distance(hash_string1, hash_string2):
    """Get the number of different bits between two hash strings."""
    dist = 0
    # get diff matrices from hash string
    bits1 = hash_to_bits(hash_string1)
    bits2 = hash_to_bits(hash_string2)

    # compute distance
    for bit1, bit2 in zip(bits1, bits2):
        if bit1 != bit2:
            dist += 1
    return dist

def hash_to_bits(hash_string):
    """Convert a hash string into the corresponding bit string."""
    bits = []
    # Convert into individual hex numbers
    hex_nums = ['0x' + hash_string[i:i+2] for i in range(0, len(hash_string), 2)]
    for hex_num in hex_nums:
        bit_string = bin(int(hex_num, 16))[2:].rjust(8, '0') # binary string
        bits.append(bit_string)
    return "".join(bits) # return as one string


In [39]:
from itertools import combinations

# key value pair (key, value)

example_hash_strings= [("0WS86GPURFK5","c68686868e0f0e1c"), 
                    ("76KUS3QCGVCY","c78786868e0f0e1c") ,
                    ("96EC4QS20Z28","c78786868e0f0e1c"),
                    ("CL8W7L333U90","c78706868e0f0e1c"),
                    ("FDAZ5NL5NFL2","c7870646ce0f0e1c"),
                    ("HBX8QLI9HH25","c7870686ce0f0e1c"),
                    ("JY2ZAINWD2RX","c68706068e0e0e1c"),
                    ("LP47ZGJ256YU","c78786068e0f0e1e"),
                    ("NTETO8P77N96","c78786868e0f0e1c"),
                    ("SLK2PRXGW3DZ","c78706868e0f0e1c")]

example_hash_strings2= [("0WS86GPURFK5","c68686868e0f0e1c"), 
                    ("76KUS3QCGVCY","c78786868e0f0e1c") ]


find_hamming_distances(example_hash_strings2)


0WS86GPURFK5|76KUS3QCGVCY - c68686868e0f0e1c|c78786868e0f0e1c - 2


In [27]:
distances
    

[(('0WS86GPURFK5', 'c68686868e0f0e1c'),
  ('76KUS3QCGVCY', 'c78786868e0f0e1c'),
  2)]