In [19]:
import cv2 as cv
import numpy as np
import glob
import os
import matplotlib.pyplot as plt
import pdb
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn import svm
from sklearn.metrics import confusion_matrix

In [20]:
############ svm training
images_names = os.listdir('lab3/digits') 
num_images = len(images_names)
image_size = (32, 32)
images = []
labels = []
for image_name in images_names:
    im = cv.imread(os.path.join('lab3/digits', image_name))  
    im = cv.resize(im, image_size)
    im = cv.cvtColor(im, cv.COLOR_BGR2GRAY)
    images.append(im)   
    label = int(image_name[0])
    labels.append(label)
images = np.array(images)
labels = np.array(labels) 
train_images, test_images, train_labels, test_labels  = train_test_split(images, labels, test_size=0.006, random_state=15)

def normalize_data(train_data, test_data, type='l2'):
    scaler = None  
    if type == 'l1':
        scaler = preprocessing.Normalizer(norm='l1') 
    elif type == 'l2':
        scaler = preprocessing.Normalizer(norm='l2')

    if scaler is not None:
        scaler.fit(train_data)
        scaled_train_data = scaler.transform(train_data)
        scaled_test_data = scaler.transform(test_data) 
        return (scaled_train_data, scaled_test_data)
    else:
        print("No scaling was performed. Raw data is returned.")
        return (train_data, test_data)

# apply svm on raw pixels
train_features = train_images.copy().reshape([-1, image_size[0] * image_size[1]])
test_features = test_images.copy().reshape([-1, image_size[0] * image_size[1]])
train_features, test_features = normalize_data(train_features, test_features, 'l2')

svm_model = svm.SVC(C=100, kernel='linear')
svm_model.fit(train_features, train_labels)
predicted_labels = svm_model.predict(test_features)

In [21]:
def image_matching(input_image):
    img_template = cv.imread('Project1/template.jpg')
    img_template = cv.resize(img_template, (0, 0), fx=0.5, fy=0.5)
    img_query = input_image
    
    img_template = cv.cvtColor(img_template, cv.COLOR_BGR2RGB)
    img_query = cv.cvtColor(img_query, cv.COLOR_BGR2RGB)
    
    img1 = img_template
    img2 = img_query
    
    orb = cv.ORB_create(nfeatures=10000) 
    kp1, des1 = orb.detectAndCompute(img1, None)
    kp2, des2 = orb.detectAndCompute(img2, None) 
    

    bf = cv.BFMatcher(cv.NORM_HAMMING, crossCheck=True)

    matches = bf.match(des2, des1) 
    matches = sorted(matches, key = lambda x:x.distance)
    
    if SHOW_INTERMEDIATE_RESULTS:
        img3 = cv.drawMatches(img2, kp2, img1, kp1, matches[:500], None, flags=cv.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)
        cv.imshow("matching",cv.resize(img3, (0, 0), fx=0.4, fy=0.4))
        cv.waitKey(0)
        cv.destroyAllWindows() 
        
    
    template_points = np.zeros((len(matches),2),dtype=np.float32)
    query_points = np.zeros((len(matches),2),dtype=np.float32)

    for i,m in enumerate(matches):
        template_points[i,:] = kp1[m.trainIdx].pt
        query_points[i,:] = kp2[m.queryIdx].pt
        
    H, mask = cv.findHomography(query_points, template_points, cv.RANSAC)

    height, width, _ = img_template.shape
    warped_image = cv.warpPerspective(input_image, H, (width, height), flags=cv.INTER_NEAREST)

    if SHOW_INTERMEDIATE_RESULTS:
        cv.imshow("wraped image",cv.resize(warped_image, (0, 0), fx=0.4, fy=0.4))
        cv.waitKey(0)
        cv.destroyAllWindows()  
    
    return warped_image

def prep_image(image):
    if SHOW_INTERMEDIATE_RESULTS:
        cv.imshow("original image", cv.resize(image, (0, 0), fx=0.4, fy=0.4))
        cv.waitKey(NUM_OF_SECONDS)
        cv.destroyAllWindows()


    orig_h, orig_w, _ = image.shape
    # mean of scanned images is about 240
    # mean of rotation/perspective is about 190
    # 215 is a sensible value for a threshold to determine whether it's scanned or not
    if image.mean()<215:
        image = make_contrast(image)
        image = image_matching(image)
    else:
        image = image[int(orig_h * 0.45) : int(orig_h * 0.88)]
    
    if SHOW_INTERMEDIATE_RESULTS:
        cv.imshow("cropped image", cv.resize(image, (0, 0), fx=0.4, fy=0.4))
        cv.waitKey(NUM_OF_SECONDS)
        cv.destroyAllWindows()

    # transform the image to grayscale
    grayscale_image = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
    
    if SHOW_INTERMEDIATE_RESULTS:
        cv.imshow("grayscale image", cv.resize(grayscale_image, (0, 0), fx=0.4, fy=0.4))
        cv.waitKey(NUM_OF_SECONDS)
        cv.destroyAllWindows()
         
    return grayscale_image

def find_rows(grayscale_image):
    
    edges_y = cv.Sobel(grayscale_image, ddepth=cv.CV_64F, dx=0, dy=1) 
    edges_y = np.abs(edges_y)
    edges_y = edges_y / edges_y.max()
    
    if SHOW_INTERMEDIATE_RESULTS:
        cv.imshow("edges_y", cv.resize(edges_y, (0, 0), fx=0.40, fy=0.4))
        cv.waitKey(NUM_OF_SECONDS)
        cv.destroyAllWindows() 
        
    _, edges_y_th = cv.threshold(edges_y, 0.4, 255, cv.THRESH_BINARY_INV) # the second param is the threshold
    if SHOW_INTERMEDIATE_RESULTS:
        cv.imshow("edges_y", cv.resize(edges_y_th, (0, 0), fx=0.4, fy=0.4))
        cv.waitKey(NUM_OF_SECONDS)
        cv.destroyAllWindows()
        
    mask = (edges_y_th == 0) * 1
    
    all_lines = np.sum(mask, axis=1)
    all_lines = all_lines.argsort()
    
    num_lines = 120
    if grayscale_image.mean()>215:
        num_lines = 80
    edges_y_th = np.dstack((edges_y_th, edges_y_th, edges_y_th))
    lines = [] #  _ x 
    for i in range(1, num_lines + 1):
        cv.line(edges_y_th, (0, all_lines[-i]), (grayscale_image.shape[1], all_lines[-i]), (0, 0, 255), 2) 
        lines.append([(0, all_lines[-i]), (grayscale_image.shape[1], all_lines[-i])])
        
    if SHOW_INTERMEDIATE_RESULTS:
        cv.imshow("edges_y_th", cv.resize(edges_y_th, (0, 0), fx=0.4, fy=0.4))
        cv.waitKey(NUM_OF_SECONDS)
        cv.destroyAllWindows()
    
    lines.sort(key=lambda coords: coords[0][1])
    
    threshold_same_line = 30
    distict_lines = []   
    distict_lines.append(lines[0])
     
    for line in lines:  
        if line[0][1] - distict_lines[-1][0][1] > threshold_same_line:
            distict_lines.append(line)   
    
    correct_lines = distict_lines[-16:]
    color_image = np.dstack((grayscale_image, grayscale_image, grayscale_image))
    for line in correct_lines: 
        cv.line(color_image, line[0], line[1], (255, 0, 0), 5) 
        
    if SHOW_INTERMEDIATE_RESULTS:
        cv.imshow("Detected lines", cv.resize(color_image, (0, 0), fx=0.4, fy=0.4))
        cv.waitKey(NUM_OF_SECONDS)
        cv.destroyAllWindows()
    
    return correct_lines

def find_columns(grayscale_image):
    edges_x = cv.Sobel(grayscale_image, ddepth=cv.CV_64F, dx=1, dy=0) 
    
    edges_x = np.abs(edges_x)
    edges_x = edges_x / edges_x.max()
    
    if SHOW_INTERMEDIATE_RESULTS:
        cv.imshow("edges_x_1", cv.resize(edges_x, (0, 0), fx=0.4, fy=0.4))
        cv.waitKey(NUM_OF_SECONDS)
        cv.destroyAllWindows()
        
    _, edges_x_th = cv.threshold(edges_x, 0.20, 255, cv.THRESH_BINARY_INV) 
    if SHOW_INTERMEDIATE_RESULTS:
        cv.imshow("edges_x_2", cv.resize(edges_x_th, (0, 0), fx=0.4, fy=0.4))
        cv.waitKey(NUM_OF_SECONDS)
        cv.destroyAllWindows()
        
    mask = (edges_x_th == 0) * 1
    
    all_cols = np.sum(mask, axis=0)
    all_cols = all_cols.argsort()
    
    num_cols = 80
    if grayscale_image.mean()>215:
        num_cols = 50
    edges_x_th = np.dstack((edges_x_th, edges_x_th, edges_x_th))
    cols = [] #  _ x 
    for i in range(1, num_cols + 1):
        cv.line(edges_x_th, (all_cols[-i], 0), (all_cols[-i], grayscale_image.shape[0]), (0, 0, 255), 2)
        cols.append([(all_cols[-i], 0), (all_cols[-i], grayscale_image.shape[0])])   
        
    if SHOW_INTERMEDIATE_RESULTS:
        cv.imshow("edges_x_th", cv.resize(edges_x_th, (0, 0), fx=0.4, fy=0.4))
        cv.waitKey(NUM_OF_SECONDS)
        cv.destroyAllWindows()
    
    cols.sort(key=lambda coords: coords[0][0])
    threshold_same_column = 50
    distinct_cols = []
    distinct_cols.append(cols[0])
    
    for col in cols:  
        if col[0][0] - distinct_cols[-1][0][0] > threshold_same_column:
            distinct_cols.append(col) 
    
    correct_cols = distinct_cols[-5:]
    color_image = np.dstack((grayscale_image, grayscale_image, grayscale_image))
    for col in correct_cols: 
        cv.line(color_image, col[0], col[1], (255, 0, 0), 5) 
    if SHOW_INTERMEDIATE_RESULTS:
        cv.imshow("Detected cols", cv.resize(color_image, (0, 0), fx=0.4, fy=0.4))
        cv.waitKey(NUM_OF_SECONDS)
        cv.destroyAllWindows()
    
    return correct_cols

def find_table(grayscale_image):
    cols = find_columns(grayscale_image.copy())
    rows = find_rows(grayscale_image.copy())
    x_min = cols[0][0][0]
    x_max = cols[-1][1][0]
    y_min = rows[0][0][1]
    y_max = rows[-1][1][1]
    
    table = grayscale_image[y_min:y_max, x_min:x_max] 
    image = np.dstack((grayscale_image, grayscale_image, grayscale_image))
    
    for i in range(5): 
        cv.line(image, cols[i][0], cols[i][1], (255, 0, 0), 5) 
    for i in range(16): 
        cv.line(image, rows[i][0], rows[i][1], (0, 0, 255), 5) 
        
    if SHOW_INTERMEDIATE_RESULTS:
        cv.imshow('image', cv.resize(image, (0, 0), fx=0.4, fy=0.4)) 
        cv.imshow('table', cv.resize(table, (0, 0), fx=0.4, fy=0.4)) 
        cv.waitKey(0)
        cv.destroyAllWindows()
      
    return table, [x_min, y_min, x_max, y_max], cols, rows    

def detect_subject_choice(choice_patch_grayscale):
    mask = np.uint8(choice_patch_grayscale < 50)
    _, contours, hierarchy = cv.findContours(mask, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)
    
    areas = []
    coords = []
    
    if len(contours) != 0:
        for idx_det in range(len(contours)):
            contour = contours[idx_det] 
            contour = np.squeeze(contour)  
            
            if contour.ndim == 1:
                continue
            
            # in contour, 0->x and 1->y
            x_min = np.min(contour[:, 0])
            x_max = np.max(contour[:, 0])

            y_min = np.min(contour[:, 1])
            y_max = np.max(contour[:, 1])
            
            areas.append(((x_max - x_min) * (y_max - y_min)))
            coords.append([(x_min, y_min), (x_max, y_max)])
            
    if len(areas) < 2:
        return None
    
    areas = np.array(areas)
    indices = areas.argsort()
    bbox_1 = coords[indices[-1]]
    bbox_2 = coords[indices[-2]]
    return bbox_1, bbox_2
    
def subject_choice(right_grayscale_image):
    _, bbox, r_cols, r_rows = find_table(right_grayscale_image)
    x_min = r_cols[-2][0][0]
    x_max = r_cols[-1][0][0]
    y_min = 0
    y_max = r_rows[0][1][1]

    patch = right_grayscale_image[y_min:y_max, x_min:x_max]
    
    if SHOW_INTERMEDIATE_RESULTS:
        cv.imshow('area for searching the choice', patch)
        cv.waitKey(0)
        cv.destroyAllWindows()

    bbox_1, bbox_2 = detect_subject_choice(patch)
    choice_patch_color = np.dstack((patch, patch, patch))
    cv.rectangle(choice_patch_color, bbox_1[0], bbox_1[1], color=(255, 0, 255), thickness=5)
    cv.rectangle(choice_patch_color, bbox_2[0], bbox_2[1], color=(0, 255, 255), thickness=5)

    if SHOW_INTERMEDIATE_RESULTS:
        cv.imshow('subject choice', choice_patch_color) 
        cv.waitKey(0)
        cv.destroyAllWindows()  
        
    return patch,bbox_1,bbox_2,r_cols,r_rows



def find_x_from_gt(grayscale_image, vertical_lines, horizontal_lines, ground_truth): 
    mean_x = []
    mean_blank = [] 
    
    image = np.dstack((grayscale_image, grayscale_image, grayscale_image))
    x_color = (0, 255, 0)  # green
    blank_color = (0, 0, 255)  # red
            
    for i in range(len(horizontal_lines) - 1):
        for j in range(len(vertical_lines) - 1):
            x_min = vertical_lines[j][0][0] +15
            x_max = vertical_lines[j + 1][1][0] - 5
            y_min = horizontal_lines[i][0][1] + 15
            y_max = horizontal_lines[i + 1][1][1] - 5
            
            patch = grayscale_image[y_min:y_max,x_min:x_max].copy()

            mean_patch_value = np.round(patch.mean())

            if(char_to_index[ground_truth[i][1]] == j):
                mean_x.append(mean_patch_value)
                color = x_color

            else:
                mean_blank.append(mean_patch_value)
                color = blank_color


            cv.rectangle(image, (x_min, y_min), (x_max, y_max), color=color, thickness=5)
            cv.putText(image, str(mean_patch_value)[:3] ,(x_min + 10, y_min + 50), cv.FONT_HERSHEY_COMPLEX, 1, (0,0,0), 2) 
    if SHOW_INTERMEDIATE_RESULTS:
        cv.imshow("image",cv.resize(image, (0, 0), fx=0.4, fy=0.4))
        cv.waitKey(0)
        cv.destroyAllWindows()  
        
    return mean_x, mean_blank

def find_x_from_img(grayscale_image, vertical_lines, horizontal_lines, ground_truth):
    correct_answer = 0
    
    image = np.dstack((grayscale_image, grayscale_image, grayscale_image))
    
    for i in range(len(horizontal_lines) - 1):
        min_value_color = 255
        pos = 0
        x1,x2,y1,y2=0,0,0,0
        for j in range(len(vertical_lines) - 1):
            x_min = vertical_lines[j][0][0] +15
            x_max = vertical_lines[j + 1][1][0] - 5
            y_min = horizontal_lines[i][0][1] + 15
            y_max = horizontal_lines[i + 1][1][1] - 5
            
            patch = grayscale_image[y_min:y_max,x_min:x_max].copy()

            if patch.mean() < min_value_color:
                min_value_color=patch.mean()
                pos = j
                x1=x_min
                x2=x_max
                y1=y_min
                y2=y_max
                
        if(char_to_index[ground_truth[i][1]] == pos):
            correct_answer = correct_answer + 1
            cv.rectangle(image, (x1, y1), (x2, y2), color=(0, 255, 0)  , thickness=5)
            cv.putText(image, str(min_value_color)[:3] ,(x1 + 10, y1 + 50), cv.FONT_HERSHEY_COMPLEX, 1, (0,0,255), 2)
            
    if SHOW_INTERMEDIATE_RESULTS:
        cv.imshow("graded image",cv.resize(image, (0, 0), fx=0.4, fy=0.3))
        cv.waitKey(0)
        cv.destroyAllWindows()  
        
    return correct_answer

def make_contrast(img):
    lab= cv.cvtColor(img, cv.COLOR_BGR2LAB)
    l, a, b = cv.split(lab)
    clahe = cv.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
    cl = clahe.apply(l)
    limg = cv.merge((cl,a,b))
    final = cv.cvtColor(limg, cv.COLOR_LAB2BGR)
    return final
    

def grade(i):
    original_image = cv.imread(i)
        
    grayscale_image = prep_image(original_image)
    orig_h, orig_w = grayscale_image.shape
    
    left_grayscale_image = grayscale_image[:, : orig_w // 2]
    _,w = left_grayscale_image.shape
    left_grayscale_image = left_grayscale_image[:,int(w*0.1):int(w*0.9)]
    right_grayscale_image = grayscale_image[:, orig_w // 2 :]
    _,w = right_grayscale_image.shape
    right_grayscale_image = right_grayscale_image[:,int(w*0.1):int(w*0.9)]
    
    _, _, l_cols, l_rows = find_table(left_grayscale_image)
    choice_patch, bbox1, bbox2, r_cols, r_rows = subject_choice(right_grayscale_image)
    
    if bbox2[0][1]>bbox1[0][1]: # sometimes they come interchanged. we sort them by y_min
        bbox2, bbox1 = bbox1, bbox2
    
    patch_i = choice_patch[bbox2[0][1]:bbox2[1][1],bbox2[0][0]:bbox2[1][0]]
    patch_i = cv.resize(patch_i, (32,32))


    patch_f = choice_patch[bbox1[0][1]:bbox1[1][1],bbox1[0][0]:bbox1[1][0]]
    patch_f = cv.resize(patch_f, (32,32))

    
    test_img = patch_f.copy()
    subj = 'F'
    if patch_f.mean()>patch_i.mean(): # take the darker patch
        test_img = patch_i.copy()
        subj = 'I'
        
    test_img = test_img.copy().reshape([-1, image_size[0] * image_size[1]])
    _, test_img = normalize_data(test_features,test_img,'l2')
    pred_label = svm_model.predict(test_img)
    subj_nr = str(pred_label[0])
    
    correct_left = correct[0][1:16]
    correct_right = correct[0][16:-1]
    if subj=='F':
        if subj_nr == '1':
            correct_left = correct[0][1:16]
            correct_right = correct[0][16:-1]
        elif subj_nr == '2':
            correct_left = correct[1][1:16]
            correct_right = correct[1][16:-1]
        elif subj_nr == '3':
            correct_left = correct[2][1:16]
            correct_right = correct[2][16:-1]
        elif subj_nr == '4':
            correct_left = correct[3][1:16]
            correct_right = correct[3][16:-1]
    elif subj=='I':
        if subj_nr == '1':
            correct_left = correct[4][1:16]
            correct_right = correct[4][16:-1]
        elif subj_nr == '2':
            correct_left = correct[5][1:16]
            correct_right = correct[5][16:-1]
        elif subj_nr == '3':
            correct_left = correct[6][1:16]
            correct_right = correct[6][16:-1]
        elif subj_nr == '4':
            correct_left = correct[7][1:16]
            correct_right = correct[7][16:-1]

    x=0
    y=0
    x = find_x_from_img(left_grayscale_image,l_cols,l_rows,correct_left)
    y = find_x_from_img(right_grayscale_image,r_cols,r_rows,correct_right)
    SHOW_INTERMEDIATE_RESULTS = False
    return (x+y)*9/30+1



In [22]:
correct = []
correct.append(np.loadtxt('Project1/ground-truth-correct-answers/Fizica_varianta1.txt', dtype=str))
correct.append(np.loadtxt('Project1/ground-truth-correct-answers/Fizica_varianta2.txt', dtype=str))
correct.append(np.loadtxt('Project1/ground-truth-correct-answers/Fizica_varianta3.txt', dtype=str))
correct.append(np.loadtxt('Project1/ground-truth-correct-answers/Fizica_varianta4.txt', dtype=str))
correct.append(np.loadtxt('Project1/ground-truth-correct-answers/Informatica_varianta1.txt', dtype=str))
correct.append(np.loadtxt('Project1/ground-truth-correct-answers/Informatica_varianta2.txt', dtype=str))
correct.append(np.loadtxt('Project1/ground-truth-correct-answers/Informatica_varianta3.txt', dtype=str))
correct.append(np.loadtxt('Project1/ground-truth-correct-answers/Informatica_varianta4.txt', dtype=str))

base_folder = 'Project1/additional_data/3.no_annotation/'

char_to_index = {'A': 0, 'B': 1, 'C': 2, 'D': 3}
NUM_OF_SECONDS = 0
SHOW_INTERMEDIATE_RESULTS = 0
images_names = glob.glob(os.path.join(base_folder, "*.jpg")) 

f = open('gavrila_alexandru_407_task3.txt', 'w')

for i in images_names:
    gr=0
    try:
#         SHOW_INTERMEDIATE_RESULTS = 1
        gr=grade(i)
#         SHOW_INTERMEDIATE_RESULTS = 0
    except:
        pass
    f.write(i[len(base_folder):] +'\t'+str(gr)+'\n')
f.close()