In [2]:
#IMPORTS
import cv2
import numpy as np 
import os
from pytesseract import Output
import pytesseract
import nltk
from nltk.corpus import words
import PIL.Image as Image
import warnings
import tensorflow as tf
import csv
import time
from tqdm import tqdm
import torch
from torchvision import transforms
from PIL import Image
import torchvision
from torch import nn
import shutil
#Proceed to EOF to use the grading function, run all functions sequentially

  _torch_pytree._register_pytree_node(


In [15]:
warnings.filterwarnings('ignore')

In [16]:
# Download the word list
nltk.download('words')
# Get the list of valid words
valid_words = set(words.words())

[nltk_data] Downloading package words to
[nltk_data]     C:\Users\tusha\AppData\Roaming\nltk_data...
[nltk_data]   Package words is already up-to-date!


In [17]:
def is_gibberish(text, threshold=0.40):
    words_in_text = text.split()
    if not words_in_text:
        return True
    
    valid_count = sum(1 for word in words_in_text if word.lower() in valid_words)
    valid_ratio = valid_count / len(words_in_text)

    
    return valid_ratio < threshold


def rotate_image(image, angle):
    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)

    # Calculate the rotation matrix
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    
    # Calculate the sine and cosine of the rotation matrix
    cos = np.abs(M[0, 0])
    sin = np.abs(M[0, 1])

    # Compute the new bounding dimensions of the image
    new_w = int((h * sin) + (w * cos))
    new_h = int((h * cos) + (w * sin))

    # Adjust the rotation matrix to account for translation
    M[0, 2] += (new_w / 2) - center[0]
    M[1, 2] += (new_h / 2) - center[1]

    # Perform the actual rotation and return the image
    rotated = cv2.warpAffine(image, M, (new_w, new_h))
    return rotated


def correct_orientation(image):
    h, w = image.shape[:2]
    new_image = image.copy()
    if (h<w) :
        new_image = rotate_image(image, 90)
    text = pytesseract.image_to_string(new_image)
    if is_gibberish(text):
        new_image = rotate_image(new_image, 180)
        
    return new_image
    

def resizeImg(img, scale = 0.1): 
    width = int(img.shape[1]*scale)
    height = int(img.shape[0]*(scale))
    dimensions = (width,height)
    return cv2.resize(img,dimensions,interpolation=cv2.INTER_AREA)

def compressImages(directory):
    filenames = os.listdir(directory)
    for image in filenames:
        img = cv2.imread(f'{directory}/{image}')  
        scale = round(900/max(img.shape[:2]), 2)
        img_reshape = resizeImg(img, scale)
        img_gray = cv2.cvtColor(img_reshape,cv2.COLOR_BGR2GRAY)

        img_orn = correct_orientation(img_gray)
        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(1,1))
        sharpened_image = cv2.filter2D(img_orn, -1, kernel)
        if not os.path.exists("compressed"):
              os.makedirs("compressed")
        
        cv2.imwrite(f"compressed/{image}",sharpened_image)
    print("Done")

def compressImg(img):
    scale = round(900/max(img.shape[:2]), 2)
    img_reshape = resizeImg(img, scale)
    img_gray = cv2.cvtColor(img_reshape,cv2.COLOR_BGR2GRAY)

    img_orn = correct_orientation(img_gray)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(1,1))
    sharpened_image = cv2.filter2D(img_orn, -1, kernel)

    return sharpened_image
    


In [18]:
def save_tf_answers(img, qno = -1):
    
    try:
        img  = compressImg(img)
        img = (img[:, 3*img.shape[1]//5:])
        H,W = img.shape[0], img.shape[1]
    
        #print(img.shape)
        #gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        #Image.fromarray(gray).show()
        canny = cv2.Canny(img, 50, 200, None, 3)
        #Image.fromarray(canny).show()
        
        try:
            lines = cv2.HoughLines(canny, 1, np.pi/180, 200)
            lines = np.reshape(lines, (lines.shape[0], 2))
        
            
            vertical_lines = []
            
            for  i in lines:
                if abs(np.cos(i[1]))>0.99:
                    if(np.cos(i[1])<0):
                        i[0] = abs(i[0])
                        i[1] = np.pi + i[1]
                        vertical_lines.append(i)
                    else:
                        vertical_lines.append(i)
                        
            vertical_lines = np.array(vertical_lines)
            vertical_lines = vertical_lines[vertical_lines[:, 0].argsort()]
            v_edges = [vertical_lines[0],]
            
            for i in range(1,len(vertical_lines)):
                if abs(vertical_lines[i][0] - v_edges[len(v_edges)-1][0]) < W//3: #Magic number
                    pass
                else:
                    v_edges.append(vertical_lines[i])
        
            v_edges = np.array(v_edges, dtype = int)
            
    
        
            TFcol = img[:, v_edges[0][0]:v_edges[1][0]]  #Image of the True/False Column
            
        except Exception as e:
            print(e)
            print(f"ERROR")
            return None

        
        #Image.fromarray(TFcol).show()    
        cannyTF = cv2.Canny(TFcol, 10, 100, None, 3) 
    
        boxTF = cannyTF.copy()  # To show locations of True/False boxes after processing canny image of True/False Column Image
        horizontal = []         # Tracks the horizontal lines
        kernel = 5              # Determines sensitivity in Line detection, ( 3 - 7) is the ideal range
        tolerance = 10
        for i in range(kernel,cannyTF.shape[0]-kernel):
            h_line = False
            for j in range(tolerance, cannyTF.shape[1]-tolerance):
                for k in range(-1*kernel, kernel):
                    if (cannyTF[i+k][j-1] == 255 or cannyTF[i+k][j] == 255 or cannyTF[i+k][j+1] == 255):
                        h_line = True
                        break
                else:
                    h_line = False
                    break
                
                h_line = True
            if (h_line):
                horizontal.append(i)
                boxTF[i,:] = 0
            else:
                boxTF[i, :] = 255
        
        min_lines = []                       # Stores the row of 1 line from each horizontal lines cluster, 
        s, n = 0, 0
        
        for i in range(len(horizontal)-1):
            if horizontal[i+1]-horizontal[i] <= kernel:
                s += horizontal[i]
                n += 1
                if( i == len(horizontal)-2):
                    min_lines.append(int(s/n))
    
            else:
                s += horizontal[i]
                n += 1
                min_lines.append(int(s/n))
                if( i == len(horizontal)-2):
                    min_lines.append(horizontal[-1])
                s = 0
                n = 0
                
        #print(horizontal)
    
        #Removes if a line is detected at the top of the Image
        try:
            n = len(min_lines)
            s = 0
            diff = []
            for i in range(0, n-1):
               diff.append(min_lines[i+1]-min_lines[i])
            diff = np.array(diff)
            mean_diff = np.mean(diff)
            std_diff = np.std(diff)
            ini_dist = 0
            #print(mean_diff, std_diff, min_lines)
                
            if min_lines[0] < 10:
                min_lines = min_lines[1:]
            if (qno < 0):
                pass
            else:
                try:
                    min_lines = min_lines[:qno+1]
                    
                except:
                    pass
            if(len(min_lines >=13):
                min_lines = min_lines[2:]
        except Exception as e:
            print(e)
            print(f"ERROR B") 
    
        splits = 0
        splitted_images = []
        img_height = 224
        img_width = 224
        for i in range(len(min_lines)-1):
            sliced_img = TFcol[min_lines[i]:min_lines[i+1], :]
            
            if (sliced_img.shape[0] < 25 or sliced_img.shape[0]>200):
                continue
            resized_img = cv2.resize(sliced_img, (img_width, img_height))
            # _, binary_img = cv2.threshold(resized_img, 128, 255, cv2.THRESH_BINARY)
            
            # rescaled_img = binary_img / 255.0
            splitted_images.append(resized_img)
            splits += 1
        #print("splits ", splits)
        splitted_images = np.array(splitted_images)
        return splitted_images 
    except Exception as e:
        print(e)


In [19]:
def evaluate_answers(model, images, correct_answers, device):
    # Define a map from class indices to labels
    prediction_map = {2: 'True', 1: 'False', 0: 'Empty'}

    pretrained_vit_transforms = transforms.Compose([
        transforms.Resize((224, 224)),  # Resize to 224x224
        transforms.Lambda(lambda img: img.convert("RGB")),  # Convert grayscale images to RGB
        transforms.ToTensor(),  # Convert to tensor
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),  # Normalize
    ])
    
    # Transform images and prepare for prediction
    transformed_images = [pretrained_vit_transforms(Image.fromarray((image).astype(np.uint8))) for image in images]
    transformed_images = torch.stack(transformed_images).to(device)
    
    # Predict the output for each image using the model
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():
        outputs = model(transformed_images)
    
    # Convert predictions to readable format
    _, predicted_indices = torch.max(outputs, 1)
    predicted_answers = [prediction_map[idx.item()] for idx in predicted_indices]
    
    # Calculate marks
    marks = []
    for pred, correct in zip(predicted_answers, correct_answers):
        if pred == correct:
            marks.append(1)
        else:
            marks.append(0)
    
    return marks

In [20]:
def Grade_Sheets(directory, model, correct_answers, qno=-1):
    current_time = str(time.time())[:10]
    with open(f"Graded_Sheets_vit_{current_time}.csv", "w", newline="") as f:
        writer = csv.writer(f)
        writer.writerow(["ID", "Predicted Marks"])
        
        files = [file for file in os.listdir(directory) if file.endswith(('jpg', 'jpeg', 'png'))]
        
        for file in tqdm(files, desc="Grading Sheets"):
            img = cv2.imread(f"{directory}/{file}")
            images = save_tf_answers(img, qno)
            
            if images is None:
                continue  # Skip if no images were extracted
            
            marks = evaluate_answers(model, images, correct_answers, device)
            
            writer.writerow([file, np.sum(marks)])

In [None]:
#Image Segregator
#Change the file paths then run

#Only Run if you need to Segregate Images based on Mapping
#Create a Copy of Original Dataset before Running

map_file = "Phase-1 Evaluation Dataset/img_model_answer_mapping.csv"
img_directory = "Phase-1 Evaluation Dataset/test"
with open(map_file, "r") as f:
    reader = csv.reader(f); 
    skip = 0
    for row in reader:
        if(skip == 0):
            skip = 1
            continue
        mapping = row[1]
        image_name = row[0]
        if not os.path.exists(mapping):
            os.makedirs(mapping)
        try:
            shutil.move(f'{img_directory}/{image_name}', f'{mapping}/{image_name}')
        except Exception as e:
            print(e)

In [None]:
#USAGE => Add the directory path to the sheets to be graded in directory variable, load model, load correct answers.

directory = "C:/Users/tusha/Desktop/NCVP/Data Samples/Sample_Data"


device = "cuda" if torch.cuda.is_available() else "cpu"
model_save_path = "best_models/best_model_vit.pth"

# Load the model architecture
pretrained_vit = torchvision.models.vit_b_16(weights=None).to(device)  # Initialize with no weights
pretrained_vit.heads = nn.Linear(in_features=768, out_features=3).to(device)  # Assuming 3 classes: True, False, Empty

# Load the saved state dictionary into the model
pretrained_vit.load_state_dict(torch.load(model_save_path, map_location=device))

# Set the model to evaluation mode
pretrained_vit.eval()

#correct_answers = ['True', 'True', 'False', 'False', 'True', 'False', 'True', 'False', 'False', 'True']
correct_answers = ['False', 'False', 'False', 'False', 'False', 'False', 'True', 'True', 'True', 'True']


s = time.time()
Grade_Sheets(directory, pretrained_vit, correct_answers)
print("FINISHED:", time.time()-s)

In [4]:
#Untested 

submission_dict = []
submission_list = []
with open("Phase-1 Evaluation Dataset/submission.csv", "r") as f:
    reader = csv.reader(f)
    skip = 0
    
    for row in reader:
        if(skip == 0):
            skip = 1
            continue
        submission_dict[row[0]] = 0
        submission_list.append(row[0])
print(submission_dict)
with open("prediction.csv", 'r') as f:
    reader = csv.reader(f)
    skip = 0
    
    for row in reader:
        if(skip == 0):
            skip = 1
            continue
        submission_dict[row[0]] = row[1]

if not os.path.exists("output):
    os.makedirs("output")
with open("output/submission.csv", 'w') as f:
    writer = csv.writer(f)
    for i in submission_list:
        writer.writerow([i, submission_dict[i]])
    

[['ï»¿img_name', 'pred_marks'], ['x9v67l6.jpg', ''], ['ewA8Rpn.jpg', ''], ['E6MwJ00.jpg', ''], ['9tuGAkX.jpg', ''], ['r53EbYv.jpg', ''], ['smkLrZa.jpg', ''], ['IRpxdDf.jpg', ''], ['hBD1FRE.jpg', ''], ['EXLa4p7.jpg', ''], ['uCpvJcc.jpg', ''], ['Ul9u6l6.jpg', ''], ['u2GZM4K.jpg', ''], ['D7klNpd.jpg', ''], ['yF4wPq5.jpg', ''], ['r9ByOWm.jpg', ''], ['52pqQSe.jpg', ''], ['Q2a6ahU.jpg', ''], ['BLK52Gc.jpg', ''], ['k0xTfJi.jpg', ''], ['msX3bBM.jpg', ''], ['nG4XW3E.jpg', ''], ['rtCMIC1.jpg', ''], ['WxtTFL6.jpg', ''], ['iw06r3Q.jpg', ''], ['PXamf6c.jpg', ''], ['RZH24s7.jpg', ''], ['favEvzP.jpg', ''], ['I3rHIGa.jpg', ''], ['LCB41h0.jpg', ''], ['T2EiQ3J.jpg', ''], ['uWMcs2Y.jpg', ''], ['56jXNyb.jpg', ''], ['6EoSuEJ.jpg', ''], ['pG15tPy.jpg', ''], ['OEOMwNz.jpg', ''], ['GCKySAb.jpg', ''], ['KikF8Hg.jpg', ''], ['g9OnxRJ.jpg', ''], ['6jHuOGj.jpg', ''], ['hr0SaFm.jpg', ''], ['t4fbUW1.jpg', ''], ['LRccyJJ.jpg', ''], ['fSgibgb.jpg', ''], ['sB9gIJP.jpg', ''], ['XHbvbhu.jpg', ''], ['k0R3DoI.jpg', ''], ['