In [None]:
import os
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from collections import Counter

In [None]:
# Path of Chrome WebDriver
chrome_webdriver=""

In [None]:
# Submission Link of the Contest
submission_link=""

In [None]:
# Username and Password of the Hackerrank Account
username=""
password=""

In [None]:
# Dictionary to store the Submissions
submissions={}

In [None]:
'''
    Converts the submitted code from Hackerrank to desired format
    args:
        code: Submitted code (string)
    returns:
        new_code: Formatted code with spaces and line number removed (string)
        code_words: List of words in the code (list)
'''

def format_code(code):
    
    code_lines=code.splitlines()
    new_code=""
    
    for i in range(len(code_lines)):
        line=code_lines[i]
        line=line.strip()
        if line.isnumeric():
            continue
        new_code=" ".join((new_code, line))
       
    new_code.strip()
    code_words=new_code.split(" ")
    new_code=new_code.replace(" ","")
   
    return new_code, code_words


In [None]:
'''
    Extracts necessary information from a submission tile and adds that as a list to submissions dictionary with key equal to the problem name
    args:
        browser: WebDriver Instance (WebDriver)
        submission: Submission to process (WebElement)
'''

def process_submission(browser, submission):
    
    foo=submission.find_elements_by_css_selector("p.small")
    
    # Score got for the solution
    score=float(foo[3].text)
    # Flag to check whether the submission was during the contest
    within_contest=foo[4].text
    
    if within_contest=="No":
        return
    
    bar=submission.find_elements_by_css_selector("a.challenge-slug.backbone")
    
    # Name of the problem
    problem_name=bar[0].text
    # Username of the submitter
    submitted_by=bar[1].text
    
    view=submission.find_element_by_css_selector("a.view-results").get_attribute("href")
    
    # Link of the solution
    solution_link=view
    browser.get(view)
    
    time.sleep(4)
    
    code=browser.find_element_by_css_selector("div.CodeMirror-lines")
    code=code.text
    new_code, code_words=format_code(code)
    
    entry=[submitted_by, new_code,solution_link, code_words]
    
    if problem_name not in submissions:
        submissions[problem_name]=[]
    
    submissions[problem_name].append(entry)
    
    browser.execute_script("window.history.go(-1)")
    

In [None]:
'''
    Downloads submissions for a given page
    args:
        browser: WebDriver Instance (WebDriver)
        submission_link: Root link of the submission pages (string)
        page_num: Page Number (string)
    returns:
        boolean True if page is not empty else False
'''

def view_submissions(browser, submission_link, page_num):
    
    # Link of the submission page
    page_link=os.path.join(submission_link,page_num)
    browser.get(page_link)
    
    time.sleep(2)
    
    class_name="judge-submissions-list-view"
    
    # List of the submissions in the page
    submission_list=browser.find_elements_by_class_name(class_name)
    
    length=len(submission_list)
    
    if not length:
        return False
    
    for i in range(length):
        
        try:
            process_submission(browser, submission_list[i])
            time.sleep(2)
            submission_list=browser.find_elements_by_class_name(class_name)
        except Exception as e:
            print(e)
            continue
            
    return True


In [None]:
'''
    Goes through the different submission pages
    args:
        browser: WebDriver Instance (WebDriver)
        submission_link: Root link of the submission pages (string)
'''

def download_submissions(browser, submission_link):
    
    # Submission page number
    page_num=1
    
    should_continue=True
    
    while should_continue: 
        
        try:
            should_continue=view_submissions(browser, submission_link, str(page_num))
        except Exception as e:
            print(e)
            continue
          
        page_num+=1
        

In [None]:
'''
    Main function to download the submissions from hackerrank
    args:
        submission_link: Root link of the submission pages (string)
        username: Hackerrank account username
        password: Hackerrank account password
'''

def hackerrank_contest(submission_link, username, password):
    
    try:
        
        browser=webdriver.Chrome(chrome_webdriver)
    
        # Login to Hackerrank
        browser.get("https://www.hackerrank.com/login")
        time.sleep(1)
        browser.find_element_by_name("username").send_keys(username)
        time.sleep(2)
        browser.find_element_by_name("password").send_keys(password,Keys.ENTER)
        
        download_submissions(browser, submission_link)
    
    except Exception as e:
        print(e)
        

In [None]:
hackerrank_contest(submission_link, username, password)

In [None]:
"""
    Computes the cosine similarity between two codes
    args:
        words1: List of words for the first code (list)
        words2: List of words for the second code (list)
    returns:
        cosine: Cosine similarity of the two codes (float)
"""

def compute_cosine_similarity(words1,words2):
    
    # Dictionaries with the words of the code
    val1=Counter(words1)
    val2=Counter(words2)
    
    # List of all the words in the two codes
    words = list(val1.keys() | val2.keys())
    
    # Vectors corresponding to the two codes
    vect1 = [val1.get(word, 0) for word in words]
    vect2 = [val2.get(word, 0) for word in words]

    len1 = sum(v*v for v in vect1) ** 0.5
    len2 = sum(v*v for v in vect2) ** 0.5
    dot = sum(v1*v2 for v1,v2 in zip(vect1, vect2))
    cosine = dot/(len1 * len2)
    
    return cosine


In [None]:
"""
    Checks for plagiarism between two codes by either using cosine similarity of code equality
    args:
        disqualify: List to which items to be appended (list)
        cosine_similarity: Boolean value to decide if cosine similarity is to be used (boolean)
        thresh: threshold value for cosine similarity (float)
"""

def check_plagiarism(disqualify, cosine_similarity=False, thresh=0.95):
    
    for key in submissions:
        length=len(submissions[key])
        
        for i in range(length):
            username1=submissions[key][i][0]
            code1=submissions[key][i][1]
            solution_link1=submissions[key][i][2]
            code_words1=submissions[key][i][3]
            
            for j in range(i+1,length):
                username2=submissions[key][j][0]
                code2=submissions[key][j][1]
                solution_link2=submissions[key][j][2]
                code_words2=submissions[key][j][3]
                
                if username1==username2:
                    continue
                
                entry=[username1, solution_link1, username2, solution_link2, key]
                
                if cosine_similarity:
                    cosine=compute_cosine_similarity(code_words1,code_words2)
                    if cosine>=thresh:
                        disqualify.append(entry)
                    
                else:
                    if code2==code1:
                        disqualify.append(entry)
                    

In [None]:
# List of candidates caught doing plagiarism
disqualify=[]

check_plagiarism(disqualify,True)

In [None]:
"""
    Converts the list of disqualified candidates to a file
    args:
        disqualify: List of candidates who have been detected for plagiarism (list)
        file: Path of the file (string)
"""

def convert_to_file(disqualify, file):
    
    file_output=""
    
    for row in disqualify:
        s=" ".join(map(str,row))
        file_output="\n".join((file_output,s))
        
    with open(file,"w") as output:
        output.write(file_output)
    

In [None]:
# Path of the File
file_path=""

# Name of the File
file_name=""

convert_to_file(disqualify, os.path.join(file_path,file_name))