In [27]:
#IMPORTS
import cv2
import numpy as np 
import os
from pytesseract import Output
import pytesseract
import nltk
from nltk.corpus import words
import PIL.Image as Image
import warnings
import tensorflow as tf
import csv
import time
from tqdm import tqdm
import shutil
#Proceed to EOF to use the grading function, run all functions sequentially

In [28]:
warnings.filterwarnings('ignore')

In [29]:
# Download the word list
nltk.download('words')
# Get the list of valid words
valid_words = set(words.words())

[nltk_data] Downloading package words to
[nltk_data]     C:\Users\tusha\AppData\Roaming\nltk_data...
[nltk_data]   Package words is already up-to-date!


In [30]:
def is_gibberish(text, threshold=0.40):
    words_in_text = text.split()
    if not words_in_text:
        return True
    
    valid_count = sum(1 for word in words_in_text if word.lower() in valid_words)
    valid_ratio = valid_count / len(words_in_text)

    
    return valid_ratio < threshold


def rotate_image(image, angle):
    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)

    # Calculate the rotation matrix
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    
    # Calculate the sine and cosine of the rotation matrix
    cos = np.abs(M[0, 0])
    sin = np.abs(M[0, 1])

    # Compute the new bounding dimensions of the image
    new_w = int((h * sin) + (w * cos))
    new_h = int((h * cos) + (w * sin))

    # Adjust the rotation matrix to account for translation
    M[0, 2] += (new_w / 2) - center[0]
    M[1, 2] += (new_h / 2) - center[1]

    # Perform the actual rotation and return the image
    rotated = cv2.warpAffine(image, M, (new_w, new_h))
    return rotated


def correct_orientation(image):
    h, w = image.shape[:2]
    new_image = image.copy()
    if (h<w) :
        new_image = rotate_image(image, 90)
    text = pytesseract.image_to_string(new_image)
    if is_gibberish(text):
        new_image = rotate_image(new_image, 180)
        
    return new_image
    

def resizeImg(img, scale = 0.1): 
    width = int(img.shape[1]*scale)
    height = int(img.shape[0]*(scale))
    dimensions = (width,height)
    return cv2.resize(img,dimensions,interpolation=cv2.INTER_AREA)

def compressImages(directory):
    filenames = os.listdir(directory)
    for image in filenames:
        img = cv2.imread(f'{directory}/{image}')  
        scale = round(900/max(img.shape[:2]), 2)
        img_reshape = resizeImg(img, scale)
        img_gray = cv2.cvtColor(img_reshape,cv2.COLOR_BGR2GRAY)

        img_orn = correct_orientation(img_gray)
        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(1,1))
        sharpened_image = cv2.filter2D(img_orn, -1, kernel)
        if not os.path.exists("compressed"):
              os.makedirs("compressed")
        
        cv2.imwrite(f"compressed/{image}",sharpened_image)
    print("Done")

def compressImg(img):
    scale = round(900/max(img.shape[:2]), 2)
    img_reshape = resizeImg(img, scale)
    img_gray = cv2.cvtColor(img_reshape,cv2.COLOR_BGR2GRAY)

    img_orn = correct_orientation(img_gray)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(1,1))
    sharpened_image = cv2.filter2D(img_orn, -1, kernel)

    return sharpened_image
    


In [31]:
def save_tf_answers(img, qno = -1):
    
    try:
        img  = compressImg(img)
        img = (img[:, 3*img.shape[1]//5:])
        H,W = img.shape[0], img.shape[1]
    
        #print(img.shape)
        #gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        #Image.fromarray(gray).show()
        canny = cv2.Canny(img, 50, 200, None, 3)
        #Image.fromarray(canny).show()
        
        try:
            lines = cv2.HoughLines(canny, 1, np.pi/180, 200)
            lines = np.reshape(lines, (lines.shape[0], 2))
        
            
            vertical_lines = []
            
            for  i in lines:
                if abs(np.cos(i[1]))>0.99:
                    if(np.cos(i[1])<0):
                        i[0] = abs(i[0])
                        i[1] = np.pi + i[1]
                        vertical_lines.append(i)
                    else:
                        vertical_lines.append(i)
                        
            vertical_lines = np.array(vertical_lines)
            vertical_lines = vertical_lines[vertical_lines[:, 0].argsort()]
            v_edges = [vertical_lines[0],]
            
            for i in range(1,len(vertical_lines)):
                if abs(vertical_lines[i][0] - v_edges[len(v_edges)-1][0]) < W//3: #Magic number
                    pass
                else:
                    v_edges.append(vertical_lines[i])
        
            v_edges = np.array(v_edges, dtype = int)
            
    
        
            TFcol = img[:, v_edges[0][0]:v_edges[1][0]]  #Image of the True/False Column
            
        except Exception as e:
            print(e)
            print(f"ERROR")
            return None

        
        #Image.fromarray(TFcol).show()    
        cannyTF = cv2.Canny(TFcol, 10, 100, None, 3) 
    
        boxTF = cannyTF.copy()  # To show locations of True/False boxes after processing canny image of True/False Column Image
        horizontal = []         # Tracks the horizontal lines
        kernel = 5              # Determines sensitivity in Line detection, ( 3 - 7) is the ideal range
        tolerance = 10
        for i in range(kernel,cannyTF.shape[0]-kernel):
            h_line = False
            for j in range(tolerance, cannyTF.shape[1]-tolerance):
                for k in range(-1*kernel, kernel):
                    if (cannyTF[i+k][j-1] == 255 or cannyTF[i+k][j] == 255 or cannyTF[i+k][j+1] == 255):
                        h_line = True
                        break
                else:
                    h_line = False
                    break
                
                h_line = True
            if (h_line):
                horizontal.append(i)
                boxTF[i,:] = 0
            else:
                boxTF[i, :] = 255
        
        min_lines = []                       # Stores the row of 1 line from each horizontal lines cluster, 
        s, n = 0, 0
        
        for i in range(len(horizontal)-1):
            if horizontal[i+1]-horizontal[i] <= kernel:
                s += horizontal[i]
                n += 1
                if( i == len(horizontal)-2):
                    min_lines.append(int(s/n))
    
            else:
                s += horizontal[i]
                n += 1
                min_lines.append(int(s/n))
                if( i == len(horizontal)-2):
                    min_lines.append(horizontal[-1])
                s = 0
                n = 0
                
        #print(horizontal)
    
        #Removes if a line is detected at the top of the Image
        try:
            n = len(min_lines)
            s = 0
            diff = []
            for i in range(0, n-1):
               diff.append(min_lines[i+1]-min_lines[i])
            diff = np.array(diff)
            mean_diff = np.mean(diff)
            std_diff = np.std(diff)
            ini_dist = 0
            #print(mean_diff, std_diff, min_lines)
                
            if min_lines[0] < 10:
                min_lines = min_lines[1:]
            if (qno < 0):
                pass
            else:
                try:
                    min_lines = min_lines[:qno+1]
                    
                except:
                    pass
            if(len(min_lines) >=13):
                    min_lines = min_lines[2:]
                
        except Exception as e:
            print(e)
            print(f"ERROR B") 
    
        splits = 0
        splitted_images = []
        img_height = 40
        img_width = 100
        for i in range(len(min_lines)-1):
            sliced_img = TFcol[min_lines[i]:min_lines[i+1], :]
            
            if (sliced_img.shape[0] < 25 or sliced_img.shape[0]>200):
                continue
            resized_img = cv2.resize(sliced_img, (img_width, img_height))
            _, binary_img = cv2.threshold(resized_img, 128, 255, cv2.THRESH_BINARY)
            rescaled_img = binary_img / 255.0
            splitted_images.append(rescaled_img)
            splits += 1
        #print("splits ", splits)
        splitted_images = np.array(splitted_images)
        return splitted_images 
    except Exception as e:
        print(e)


In [32]:
def evaluate_answers(model, images, correct_answers):
    

    # Predict the output for each image using the model
    predictions = [model.predict(np.expand_dims(image, axis=0), verbose = 0) for image in images]
    
    # Convert predictions to readable format
    predicted_answers = [np.argmax(pred) for pred in predictions]
    
    # Map numeric predictions to 'True', 'False', or 'Blank'
    prediction_map = {0: 'True', 1: 'False', 2: 'Empty'}
    predicted_answers = [prediction_map[pred] for pred in predicted_answers]
    
    # Calculate marks
    marks = []
    
    
    
    for pred, correct in zip(predicted_answers, correct_answers):
        if pred == correct:
            marks.append(1)
        else:
            marks.append(0)
    
    return marks

In [35]:
def Grade_Sheets(directory, model, correct_answers, qno = -1):
    current_time = str(time.time())[:10]
    with open(f"Graded_Sheets_cnn_{current_time}.csv", "w", newline = "") as f:
        writer = csv.writer(f )
        writer.writerow(["ID","Predicted Marks"])
        
        files = [file for file in os.listdir(directory) if file.endswith(('jpg', 'jpeg', 'png'))]
        
        for file in tqdm(files, desc="Grading Sheets"):
            try:
                img = cv2.imread(f"{directory}/{file}")
                images = save_tf_answers(img, qno)
                
                marks = evaluate_answers(model, images, correct_answers)
                
                writer.writerow([file, np.sum(marks)])
            except:
                print(file)

In [None]:
#Image Segregator
#Change the file paths then run

#Only Run if you need to Segregate Images based on Mapping
#Create a Copy of Original Dataset before Running

map_file = "Phase-1 Evaluation Dataset/img_model_answer_mapping.csv"
img_directory = "Phase-1 Evaluation Dataset/test"
with open(map_file, "r") as f:
    reader = csv.reader(f); 
    skip = 0
    for row in reader:
        if(skip == 0):
            skip = 1
            continue
        mapping = row[1]
        image_name = row[0]
        if not os.path.exists(mapping):
            os.makedirs(mapping)
        try:
            shutil.move(f'{img_directory}/{image_name}', f'{mapping}/{image_name}')
        except Exception as e:
            print(e)

In [36]:
#USAGE => Add the directory path to the sheets to be graded in directory variable, load model, load correct answers.

directory = "model_answer_type2"
model_path = "best_models/best_model_cnn.keras"
model = tf.keras.models.load_model(model_path)
correct_answers = ['True', 'True', 'False', 'False', 'False', 'False', 'False', 'True', 'True', 'True']
#correct_answers = ['False', 'False', 'False', 'False', 'False', 'False', 'True', 'True', 'True', 'True']


s = time.time()
Grade_Sheets(directory, model, correct_answers)
print("FINISHED:", time.time()-s)

Grading Sheets:   0%|▏                                                                 | 1/482 [00:00<03:16,  2.44it/s]

too many indices for array: array is 1-dimensional, but 2 were indexed
ERROR
04t0i6U.jpg


Grading Sheets:   1%|▍                                                                 | 3/482 [00:03<09:47,  1.23s/it]

index 1 is out of bounds for axis 0 with size 1
ERROR
0Ld34zC.jpg


Grading Sheets:   1%|▊                                                                 | 6/482 [00:07<09:28,  1.19s/it]

too many indices for array: array is 1-dimensional, but 2 were indexed
ERROR
0qZVJKm.jpg


Grading Sheets:   2%|█▎                                                               | 10/482 [00:13<10:13,  1.30s/it]

index 1 is out of bounds for axis 0 with size 1
ERROR
1BDUOhL.jpg


Grading Sheets:   3%|██▏                                                              | 16/482 [00:24<11:09,  1.44s/it]

'NoneType' object has no attribute 'shape'
ERROR
1zHXQVK.jpg


Grading Sheets:   5%|███▏                                                             | 24/482 [00:39<12:18,  1.61s/it]

too many indices for array: array is 1-dimensional, but 2 were indexed
ERROR
40B8D9P.jpg


Grading Sheets:   6%|███▋                                                             | 27/482 [00:44<11:32,  1.52s/it]

too many indices for array: array is 1-dimensional, but 2 were indexed
ERROR
54jROSP.jpg


Grading Sheets:   6%|███▊                                                             | 28/482 [00:45<10:10,  1.35s/it]

index 1 is out of bounds for axis 0 with size 1
ERROR
55RvBa9.jpg


Grading Sheets:  10%|██████▏                                                          | 46/482 [01:21<12:04,  1.66s/it]

index 1 is out of bounds for axis 0 with size 1
ERROR
7I0zVgw.jpg


Grading Sheets:  10%|██████▎                                                          | 47/482 [01:23<12:35,  1.74s/it]

list index out of range
ERROR B


Grading Sheets:  10%|██████▍                                                          | 48/482 [01:23<09:40,  1.34s/it]

too many indices for array: array is 1-dimensional, but 2 were indexed
ERROR
7iS1YLO.jpg


Grading Sheets:  10%|██████▋                                                          | 50/482 [01:27<09:47,  1.36s/it]

index 1 is out of bounds for axis 0 with size 1
ERROR
7T7pRQW.jpg


Grading Sheets:  11%|███████▏                                                         | 53/482 [01:31<09:12,  1.29s/it]

index 1 is out of bounds for axis 0 with size 1
ERROR
89Qr0VM.jpg


Grading Sheets:  14%|█████████                                                        | 67/482 [01:57<09:34,  1.38s/it]

too many indices for array: array is 1-dimensional, but 2 were indexed
ERROR
A4qd5JR.jpg


Grading Sheets:  15%|█████████▍                                                       | 70/482 [02:02<09:13,  1.34s/it]

too many indices for array: array is 1-dimensional, but 2 were indexed
ERROR
AEvXq0w.jpg


Grading Sheets:  17%|███████████▎                                                     | 84/482 [02:32<13:24,  2.02s/it]

list index out of range
ERROR B


Grading Sheets:  20%|█████████████                                                    | 97/482 [02:56<10:33,  1.65s/it]

index 1 is out of bounds for axis 0 with size 1
ERROR
CgQmkHz.jpg


Grading Sheets:  27%|█████████████████▏                                              | 129/482 [03:57<07:31,  1.28s/it]

index 1 is out of bounds for axis 0 with size 1
ERROR
ebHOerd.jpg


Grading Sheets:  28%|█████████████████▋                                              | 133/482 [04:05<10:51,  1.87s/it]

list index out of range
ERROR B


Grading Sheets:  30%|███████████████████▌                                            | 147/482 [04:29<08:15,  1.48s/it]

'NoneType' object has no attribute 'shape'
ERROR
FMctx8O.jpg


Grading Sheets:  34%|█████████████████████▉                                          | 165/482 [05:07<09:28,  1.79s/it]

too many indices for array: array is 1-dimensional, but 2 were indexed
ERROR
GdDku30.jpg


Grading Sheets:  35%|██████████████████████▍                                         | 169/482 [05:12<07:05,  1.36s/it]

too many indices for array: array is 1-dimensional, but 2 were indexed
ERROR
GiTl4gv.jpg


Grading Sheets:  35%|██████████████████████▌                                         | 170/482 [05:12<05:40,  1.09s/it]

index 1 is out of bounds for axis 0 with size 1
ERROR
gKmfNLx.jpg


Grading Sheets:  38%|████████████████████████▌                                       | 185/482 [05:39<06:41,  1.35s/it]

too many indices for array: array is 1-dimensional, but 2 were indexed
ERROR
H2iPb1j.jpg


Grading Sheets:  40%|█████████████████████████▍                                      | 192/482 [05:54<08:34,  1.77s/it]

too many indices for array: array is 1-dimensional, but 2 were indexed
ERROR
hhJrQUf.jpg


Grading Sheets:  41%|██████████████████████████▎                                     | 198/482 [06:06<08:35,  1.82s/it]

too many indices for array: array is 1-dimensional, but 2 were indexed
ERROR
IbBere6.jpg


Grading Sheets:  42%|██████████████████████████▋                                     | 201/482 [06:11<07:41,  1.64s/it]

too many indices for array: array is 1-dimensional, but 2 were indexed
ERROR
icWtiVC.jpg


Grading Sheets:  44%|███████████████████████████▉                                    | 210/482 [06:30<07:29,  1.65s/it]

too many indices for array: array is 1-dimensional, but 2 were indexed
ERROR
IUmBbJR.jpg


Grading Sheets:  46%|█████████████████████████████▌                                  | 223/482 [06:54<06:31,  1.51s/it]

too many indices for array: array is 1-dimensional, but 2 were indexed
ERROR
JR5NBIN.jpg


Grading Sheets:  52%|█████████████████████████████████                               | 249/482 [07:46<05:57,  1.54s/it]

too many indices for array: array is 1-dimensional, but 2 were indexed
ERROR
l1ZVgCG.jpg


Grading Sheets:  52%|█████████████████████████████████▍                              | 252/482 [07:53<07:18,  1.91s/it]

list index out of range
ERROR B


Grading Sheets:  54%|██████████████████████████████████▌                             | 260/482 [08:08<06:37,  1.79s/it]

index 1 is out of bounds for axis 0 with size 1
ERROR
LSxnasq.jpg


Grading Sheets:  57%|████████████████████████████████████▏                           | 273/482 [08:35<06:16,  1.80s/it]

too many indices for array: array is 1-dimensional, but 2 were indexed
ERROR
MmuQQou.jpg


Grading Sheets:  57%|████████████████████████████████████▌                           | 275/482 [08:38<05:32,  1.61s/it]

index 1 is out of bounds for axis 0 with size 1
ERROR
mrijc85.jpg


Grading Sheets:  60%|██████████████████████████████████████▎                         | 289/482 [09:06<05:59,  1.86s/it]

index 1 is out of bounds for axis 0 with size 1
ERROR
niEMuqt.jpg


Grading Sheets:  60%|██████████████████████████████████████▌                         | 290/482 [09:07<04:39,  1.45s/it]

too many indices for array: array is 1-dimensional, but 2 were indexed
ERROR
NNgkxyl.jpg


Grading Sheets:  61%|██████████████████████████████████████▊                         | 292/482 [09:09<04:14,  1.34s/it]

list index out of range
ERROR B


Grading Sheets:  61%|███████████████████████████████████████▏                        | 295/482 [09:15<05:26,  1.74s/it]

list index out of range
ERROR B


Grading Sheets:  62%|███████████████████████████████████████▍                        | 297/482 [09:19<05:14,  1.70s/it]

too many indices for array: array is 1-dimensional, but 2 were indexed
ERROR
nx4Jtjd.jpg


Grading Sheets:  71%|█████████████████████████████████████████████▋                  | 344/482 [10:56<03:28,  1.51s/it]

too many indices for array: array is 1-dimensional, but 2 were indexed
ERROR
rijC15L.jpg


Grading Sheets:  72%|█████████████████████████████████████████████▉                  | 346/482 [10:58<02:48,  1.24s/it]

too many indices for array: array is 1-dimensional, but 2 were indexed
ERROR
rjFS7o6.jpg


Grading Sheets:  72%|██████████████████████████████████████████████▏                 | 348/482 [11:00<02:26,  1.09s/it]

index 1 is out of bounds for axis 0 with size 1
ERROR
RNxITzv.jpg


Grading Sheets:  75%|███████████████████████████████████████████████▉                | 361/482 [11:28<04:13,  2.09s/it]

index 1 is out of bounds for axis 0 with size 1
ERROR
Sb9xEDW.jpg


Grading Sheets:  79%|██████████████████████████████████████████████████▍             | 380/482 [12:09<03:03,  1.80s/it]

too many indices for array: array is 1-dimensional, but 2 were indexed
ERROR
SxOFztL.jpg


Grading Sheets:  80%|███████████████████████████████████████████████████▍            | 387/482 [12:25<03:07,  1.98s/it]

too many indices for array: array is 1-dimensional, but 2 were indexed
ERROR
tDvfNiE.jpg


Grading Sheets:  82%|████████████████████████████████████████████████████▎           | 394/482 [12:40<02:46,  1.90s/it]

too many indices for array: array is 1-dimensional, but 2 were indexed
ERROR
U1tXBng.jpg


Grading Sheets:  89%|█████████████████████████████████████████████████████████▏      | 431/482 [13:55<01:34,  1.86s/it]

index 1 is out of bounds for axis 0 with size 1
ERROR
wGegiaZ.jpg


Grading Sheets:  97%|██████████████████████████████████████████████████████████████  | 467/482 [15:10<00:26,  1.75s/it]

index 1 is out of bounds for axis 0 with size 1
ERROR
yRPD0Dn.jpg


Grading Sheets:  98%|██████████████████████████████████████████████████████████████▌ | 471/482 [15:17<00:16,  1.50s/it]

index 1 is out of bounds for axis 0 with size 1
ERROR
yyY2D8i.jpg


Grading Sheets:  98%|██████████████████████████████████████████████████████████████▉ | 474/482 [15:22<00:11,  1.46s/it]

'NoneType' object has no attribute 'shape'
ERROR
zaZoS31.jpg


Grading Sheets: 100%|███████████████████████████████████████████████████████████████▊| 481/482 [15:36<00:01,  1.76s/it]

too many indices for array: array is 1-dimensional, but 2 were indexed
ERROR
zWXFXYW.jpg


Grading Sheets: 100%|████████████████████████████████████████████████████████████████| 482/482 [15:39<00:00,  1.95s/it]

FINISHED: 939.0876700878143





In [37]:
#Untested 

submission_dict = []
submission_list = []
with open("Phase-1 Evaluation Dataset/submission.csv", "r") as f:
    reader = csv.reader(f)
    skip = 0
    
    for row in reader:
        if(skip == 0):
            skip = 1
            continue
        submission_dict[row[0]] = 5
        submission_list.append(row[0])
print(submission_dict)
with open("prediction.csv", 'r') as f:
    reader = csv.reader(f)
    skip = 0
    
    for row in reader:
        if(skip == 0):
            skip = 1
            continue
        submission_dict[row[0]] = row[1]

if not os.path.exists("output):
    os.makedirs("output")
with open("output/submission.csv", 'w') as f:
    writer = csv.writer(f)
    for i in submission_list:
        writer.writerow([i, submission_dict[i]])
    

SyntaxError: unterminated string literal (detected at line 26) (1593136013.py, line 26)