# Automated Essay Evaluation
# SMU, Data Science, Capstone
## Chris Roche, Nathan Deinlein, Darryl Dawkins

#### Brief notes on adding a new criterion

To add a criterion you need to do three main things:
    
1. Add the call to your model(s) inside a run_criteriaN function
   * this function should take in the essay and grade level as params, at a minimum
2. Update the output of your run_criteriaN function to be:
   * a string to be displayed to the student
   * a bool for whether the student needs help in this area (used by recommender)
3. Add an initial list of resources for help to the recommender csv file

In [1]:
import os
os.system('python -m spacy download en_core_web_md')

Collecting en-core-web-md==3.3.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.3.0/en_core_web_md-3.3.0-py3-none-any.whl (33.5 MB)
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_md')


0

In [2]:
# Import libraries
import nltk
nltk.download('punkt')
nltk.download('stopwords')
import gradio as gr
import spacy
import en_core_web_md
from statistics import mean, median, mode
from TRUNAJOD import surface_proxies
import TRUNAJOD.ttr
import pytextrank

[nltk_data] Downloading package punkt to /Users/cmroche/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/cmroche/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


### Various functions used for evaluating criteria

In [3]:
def get_sent_bounds(doc):
    # Get phrases, vectorize and get sent bounds
    limit_phrases = 4

    phrase_id = 0
    sent_bounds = [ [s.start, s.end, set([])] for s in doc.sents ]

    # Loop through each phrase from the document
    for p in doc._.phrases:
        # Find every sentence the chunk is apert of
        # Loop thorugh each phrase chunk
        for chunk in p.chunks:
            # ic(chunk.start, chunk.end)

            # Loop through all sentences in sent_bounds
            for sent_start, sent_end, sent_vector in sent_bounds:
                # Check if chunk is in the sentence
                if chunk.start >= sent_start and chunk.end <= sent_end:
                    # ic(sent_start, chunk.start, chunk.end, sent_end)

                    # Add phrase_id to sent_vector from sent_bounds
                    sent_vector.add(phrase_id)
                    break

        phrase_id += 1

        if phrase_id == limit_phrases:
            break
    
    return sent_bounds

def get_unit_vector(key_doc):

    # Get phrases, vectorize and get sent bounds
    limit_phrases = 4

    phrase_id = 0
    unit_vector = []

    # Loop through each phrase from the document
    for p in key_doc._.phrases:

        # Add rank to unit_vector list
        unit_vector.append(p.rank)

        phrase_id += 1

        if phrase_id == limit_phrases:
            break

    return unit_vector

def normalize_unit_vector(unit_vector):
    # Sum unit vectors for normalization
    sum_ranks = sum(unit_vector)

    # Normalize unit vector
    unit_vector = [ rank/sum_ranks for rank in unit_vector ]

    return unit_vector


def sent_uv_rank(unit_vector, sent_bounds):
    # Ranking each sentence based on how similiar they are,
    # in relation to each unit vector, using sum of squares 
    from math import sqrt

    sent_rank = {}
    sent_id = 0

    # Loop through sent_bound list
    for sent_start, sent_end, sent_vector in sent_bounds:
        sum_sq = 0.0

        # Loop through each phrase in the key vector and
        # compare it to the sentence
        for phrase_id in range(len(unit_vector)):
            # ic(phrase_id, key_unit_vector[phrase_id])

            # If phrase_id is NOT in the sent add 
            # get the sum_sq of the unit_vector length
            if phrase_id not in sent_vector:
                sum_sq += unit_vector[phrase_id]**2.0

        # Get the square root of the sum of squares
        sent_rank[sent_id] = sqrt(sum_sq)
        sent_id += 1

    return sent_rank


def get_top_ranks(doc, sent_rank):
    from operator import itemgetter
    sorted(sent_rank.items(), key=itemgetter(1))

    # limit for the number of top sentences to collect
    limit_sentences = 5

    sent_text = {}
    sent_id = 0
    top_5_ranks = []
    top_5_word_count = []
    top_5_lex_div = []

    # Create id for each sentence from the document
    for sent in doc.sents:
        sent_text[sent_id] = sent
        sent_id += 1

    num_sent = 0

    # Loop through sorted sent_rank list
    for sent_id, rank in sorted(sent_rank.items(), key=itemgetter(1)):
        num_sent += 1
        top_5_ranks.append(rank)
        
        top_5_word_count.append(surface_proxies.word_count(sent_text[sent_id]))
        top_5_lex_div.append(TRUNAJOD.ttr.lexical_diversity_mtld(doc))

        if num_sent == limit_sentences:
            break

    rank_avg = mean(top_5_ranks)
    rank_med = median(top_5_ranks)
    rank_mode = mode(top_5_ranks)

    mean_word_count = mean(top_5_word_count)
    mean_lex_div = mean(top_5_lex_div)

    return rank_avg, rank_med, rank_mode, mean_word_count, mean_lex_div

@spacy.registry.misc("articles_scrubber")
def articles_scrubber():
    def scrubber_func(span: spacy.tokens.Span) -> str:
        for token in span:
            if token.pos_ not in ["DET", "PRON", "ADJ"]:
                break
            
            span = span[1:]
        return span.lemma_
    return scrubber_func

#### Criteria 1: Lexical Diversity

In [4]:
########################################################
# Lexical Diversity Criteria1
########################################################

# Note: gradeLevel is a string (e.g., "10th Grade") but is unused in this proof of concept

def run_criteria1(essay, gradeLevel):
    needsHelp = False
    
    # Calculated in the EDA python notebook
    modelMedianDiversity = 0.5481
    modelMedianTotalWords = 184
    modelMedianUniqueWords = 101
    
    allWords = nltk.tokenize.word_tokenize(essay)
    allWords=[allWords.lower() for allWords in allWords if allWords.isalpha()]
    
    # Get basic statistics about the essay
    totalWords = len(allWords)
    vocabWords = len(set(allWords))
    diversity = vocabWords / totalWords
    
    # If below average, recommend help
    if diversity < modelMedianDiversity:
        needsHelp = True
    
        # Two most common words:
        stopwords = nltk.corpus.stopwords.words('english')
        allWordExceptStopDist = nltk.FreqDist(w.lower() for w in allWords if w not in stopwords)
        mostCommon= allWordExceptStopDist.most_common(2)
        # Source: https://stackoverflow.com/questions/28392860/
        #         print-10-most-frequently-occurring-words-of-a-text-that-including-and-excluding
        
        thesaurusesStr = f"""I recommend you focus on expanding your vocabulary. For example, your two most common words are '{mostCommon[0][0]}' and '{mostCommon[1][0]}'. Try using alternatives from a thesaurus. """

    
    criteria1OutputStr = f"""Your essay has {totalWords} total words and {vocabWords} unique words, for a Diversity of {str(round(diversity*100, 2))}%. """ 
    criteria1OutputStr = criteria1OutputStr + f"""{thesaurusesStr if needsHelp else "Your vocabulary is in good shape! Keep up the good work!"}""" 
        
    return criteria1OutputStr, needsHelp

#### Criteria 2: Exteractive Summarization

In [5]:
########################################################
# Exteractive Summarization Criteria2
########################################################

# Note: gradeLevel is a string (e.g., "10th Grade") but is unused in this proof of concept

def run_criteria2(essay, gradeLevel, essay_prompt):
    needsHelp = False
    
    # Prep the spacy nlp pipeline
    nlp = spacy.load("en_core_web_md")
    nlp.add_pipe("textrank", config={"scrubber": {"@misc": "articles_scrubber"}})
    
    if essay_prompt:
        key_doc = nlp(essay_prompt)
    else:
        key_doc = False
    
    # Created by processing a string of text with the nlp object
    doc = nlp(essay)
    
    # Return if error
    if not key_doc:
        criteria2OutputStr = f"""Not prompt provided, skipping this criterion."""
        return criteria2OutputStr, needsHelp

    sent_bounds = get_sent_bounds(doc)
    key_unit_vector = get_unit_vector(key_doc)
    key_unit_vector = normalize_unit_vector(key_unit_vector)
    key_sent_rank = sent_uv_rank(key_unit_vector, sent_bounds)
    key_rank_mean, key_rank_med, key_rank_mode, key_mean_word_count, key_mean_lex_div = get_top_ranks(doc, key_sent_rank)

    # Do they need help?
    if key_rank_mean > .35:
        criteria2OutputStr = f"""Your essay appears to follow the topic well. Nice job!"""
        needsHelp = False
    
    else:
        criteria2OutputStr = f"""Your essay seems to be a little off topic. I recommended working on this area. """
        needsHelp = True

    return criteria2OutputStr, needsHelp

#### Criteria 3: Word/Sentence Count

In [6]:
########################################################
# Word/Sentence Count Criteria3
########################################################

# Note: gradeLevel is a string (e.g., "10th Grade") but is unused in this proof of concept

def run_criteria3(essay, gradeLevel, word_count_req, min_length, max_length):
    needsHelp = False

    # Prep the spacy nlp pipeline
    nlp = spacy.load("en_core_web_md")
    nlp.add_pipe("textrank", config={"scrubber": {"@misc": "articles_scrubber"}})
    
    # Created by processing a string of text with the nlp object
    doc = nlp(essay)
    
    num_sents = surface_proxies.sentence_count(doc)
    word_count = surface_proxies.word_count(doc)
    average_sentence_length = surface_proxies.average_sentence_length(doc)
    
    # Check word count
    if word_count > word_count_req:
        criteria3OutputStr_wc = f"""Word count meets the minimum requirement."""
    
    else:
        criteria3OutputStr_wc = f"""Your word count is {word_count}, which is below the word count requirement of {str(int(word_count_req))}."""
        needsHelp = True

    # Check sentence length
    if average_sentence_length > max_length:
        needsHelp = True
        criteria3OutputStr_sl = f"""Most of your sentences are pretty long for your grade level (avg. {str(int(average_sentence_length))} words), review your paper and double-check for run-on sentences. """
    
    elif average_sentence_length < min_length:
        needsHelp = True
        criteria3OutputStr_sl = f"""Most of your sentences are on the shorter side for your grade level (avg. {str(int(average_sentence_length))} words), review your paper and check for fragmented sentences. """

    else:
        criteria3OutputStr_sl = f"""Your sentences length looks good! Keep up the good work."""
    
    # Format final output string
    criteria3OutputStr = criteria3OutputStr_wc + "\n\n" + criteria3OutputStr_sl

    return criteria3OutputStr, needsHelp

#### Recommender Engine Link Generation

In [7]:
# The Recommender Enginer is actually ran from three different UIs. One per criteria. Below is 
#   just the link to each of those 3 criteria pages. The actually RE itself runs in those other 
#   UIs and is implemented in the Recommender directory of this repo

def run_recommender(recommender_links, needHelp):
    
    # Initialize empty strings
    criteria1ResourceStr = ""
    criteria2ResourceStr = ""
    criteria3ResourceStr = ""
    
    # Links to eah criterion's individual RE UI:
    output_url_string_1 = "https://tinyurl.com/46t3j9s6" ## TODO
    output_url_string_2 = "https://tinyurl.com/46t3j9s6" ## TODO
    output_url_string_3 = "https://tinyurl.com/46t3j9s6" ## TODO

    if needHelp[0] == True:
        # If criteria1 needs help, make the string not empty
        criteria1ResourceStr = "Here's a resource to help expand your vocabulary: " + output_url_string_1
    
    if needHelp[1] == True:
        # If criteria2 needs help, make the string not empty
        criteria2ResourceStr = "Here's a resource to help you work on essay focus: " + output_url_string_2

    if needHelp[2] == True:
        # If criteria3 needs help, make the string not empty
        criteria3ResourceStr = "Here's a resource to help you work to improve length: " + output_url_string_3

    return criteria1ResourceStr, criteria2ResourceStr, criteria3ResourceStr

In [8]:
# Just parses the checkboxes from the UI

def evaluate_criteria(criteria):
    runCriteria = [False,False,False]
    
    if 'Vocabulary' in criteria:
        runCriteria[0] = True
 
    if 'Focus' in criteria:
        runCriteria[1] = True
        
    if 'Length' in criteria:
        runCriteria[2] = True
    
    return runCriteria

#### The function that gets called when the user clicks submit

In [9]:
######################################################################################
# This is the function called when you click submit on the UI

def run_model_with_feedback(essay, criteria, recommender, gradeLevel, essay_prompt, word_count, sent_min, sent_max):
    
    ##########################################################
    # Placeholders for outputs and variables 
    ##########################################################
    output_highlighted_list = []  # List of tuples. Refer to example below for format
    recommender_links = []        # append links to this
    
    # These get replaced with the results for the UI
    criteria1OutputStr = "Did not run evaluation on Vocabulary Diversity"
    criteria2OutputStr = "Did not run evaluation on Content/Essay focus"
    criteria3OutputStr = "Did not run evaluation on word count or sentence length"
    
    # Check the essay field wasn't left empty before running models
    # Return warning plus three empty criteria results and an empty recommender links
    if not essay:
        ret="Invalid/empty essay field, try again"
        return ret,ret,ret,ret,ret
    
    # Set these to true if the NLP models say the student needs help
    # Then recommender will make a list of resources based on these
    needHelp=[False,False,False]
    
    # Whether the user asked us to evaluate the criteria
    runCriteria=evaluate_criteria(criteria)

    
    
    
    ##########################################################
    # All processing gets done in the functions. 
    # This just passes inputs from UI and gets the output
    ##########################################################

    ## Criteria1:
    if runCriteria[0]:
        criteria1OutputStr, needHelp[0] = run_criteria1(essay, gradeLevel)
    
    ## Criteria2:
    if runCriteria[1]:
        criteria2OutputStr, needHelp[1] = run_criteria2(essay, gradeLevel, essay_prompt)
    
    ## Criteria 3:
    if runCriteria[2]:
        criteria3OutputStr, needHelp[2] = run_criteria3(essay, gradeLevel, word_count, sent_min, sent_max)
    
    
    
    
    ##########################################################
    # Recommender links:
    ##########################################################
    if recommender == True:
        criteria1Link, criteria2Link, criteria3Link = run_recommender(recommender_links, needHelp)
        criteria1OutputStr = criteria1OutputStr + criteria1Link
        criteria2OutputStr = criteria2OutputStr + criteria2Link
        criteria3OutputStr = criteria3OutputStr + criteria3Link
    # Else: do nothing and don't append anything
    
    
    
    
    ##########################################################
    # Return the results
    ########################################################## 
    return (f"""Evaluated student submission on {" and ".join(criteria)} with recommender turned {"on" if recommender else "off"}""", 
            criteria1OutputStr,
            criteria2OutputStr,
            criteria3OutputStr)
    
    
    
# End function


#### The User Interface code

In [10]:
######################################################################################
# This is the actual interface code
iface = gr.Interface(
    
    # this is the function call with the UI inputs serving as the arguments
    run_model_with_feedback,
    
    
    
    ##########################################################
    # Inputs to the User Interface
    ##########################################################
    [
        # First argument passed in is the essay, as a string
        gr.inputs.Textbox(lines=10, placeholder="Copy the body of your essay here...", default="", label="Student Essay:"),
        
        # Second set of options are which rubric/criteria to check
        gr.inputs.CheckboxGroup( 
                                ["Vocabulary", "Focus", "Length"], 
                                default=["Vocabulary", "Focus", "Length"],
                                label="Evaluate on which criteria?"),
        
        # An option to turn on/off the recommender engine
        gr.inputs.Checkbox(label="Recommend videos for improvement?", default=True),
        
        # Grade Level: only 10th is used right now
        gr.inputs.Dropdown(["10th Grade", "N/A"], label="Level"),
        
        # Prompt for criteria 2
        gr.inputs.Textbox(lines=5, placeholder="Paste prompt here if applicable...", default="", label="Prompt or Keywords:"),
        
        # Counts for criteria 3
        gr.inputs.Number(label="word count", default=150),
        gr.inputs.Number(label="sentence length min threshoold", default=25),
        gr.inputs.Number(label="sentence length max threshoold", default=15),
    ],
    
    
    
    ##########################################################
    # these are the output components
    ##########################################################
    [
        gr.outputs.Textbox(type="str", label="Evaluation:"),
        gr.outputs.Textbox(type="str", label="Vocabulary Diversity Results:"),
        gr.outputs.Textbox(type="str", label="Focus/Content Results:"),
        gr.outputs.Textbox(type="str", label="Length Results:"),
        
    ],
    
    
    
    ##########################################################
    # examples the UI lets you select from .. these are optional
    ##########################################################
    examples=[
        ["Dear local newspaper, I think effects computers have on people are great learning skills/affects because they give us time to chat with friends/new people, helps us learn about the globe(astronomy) and keeps us out of troble! Thing about! Dont you think so? How would you feel if your teenager is always on the phone with friends! Do you ever time to chat with your friends or buisness partner about things. Well now - there's a new way to chat the computer, theirs plenty of sites on the internet to do so: @ORGANIZATION1, @ORGANIZATION2, @CAPS1, facebook, myspace ect. Just think now while your setting up meeting with your boss on the computer, your teenager is having fun on the phone not rushing to get off cause you want to use it. How did you learn about other countrys/states outside of yours? Well I have by computer/internet, it's a new way to learn about what going on in our time! You might think your child spends a lot of time on the computer, but ask them so question about the economy, sea floor spreading or even about the @DATE1's you'll be surprise at how much he/she knows. Believe it or not the computer is much interesting then in class all day reading out of books. If your child is home on your computer or at a local library, it's better than being out with friends being fresh, or being perpressured to doing something they know isnt right. You might not know where your child is, @CAPS2 forbidde in a hospital bed because of a drive-by. Rather than your child on the computer learning, chatting or just playing games, safe and sound in your home or community place. Now I hope you have reached a point to understand and agree with me, because computers can have great effects on you or child because it gives us time to chat with friends/new people, helps us learn about the globe and believe or not keeps us out of troble. Thank you for listening.", 
           ["Vocabulary", "Focus", "Length"], True, "10th Grade", "Computers and their effect on people",150,25,15],
        ["Dear local newspaper I think that usieng computers help people becuse if we did not have computers we would not now ehey thing about eneyone or eneything like all of the @CAPS1 I would not now eneything about them but with computers I know alot about them and there lives like @CAPS2 @CAPS3 @CAPS4 got shot in the back of the head and. @CAPS4 got shot to and I know alot about the @ORGANIZATION1 there white people that to fear in to black people and the same with the wars like world @NUM1 and world @NUM2 and the @CAPS5 and @PERSON1 and the @CAPS6 war there was like plain spy palin flying across @LOCATION1 and they shot him down becuse we were trying to see if thay had eney nuculer bombs offer there. And the same with google and yahoo with google you can type in eneything and you will get a answer and most liked a corect answer yahoo and google is great for some worke and products end studying becuse you then you do to and that I think that computer are good.", 
           ["Vocabulary"], False, "10th Grade", "",150,25,15],
        ["How @CAPS4 you feel if your favorite book was taken off the shelves of your school or public library? I, along with many other students, @CAPS4 find this discouraging and distastrous, so I do not believe that censorship should affect books that are on the shelves. Otherwise, a demolished love of reading, crushed individuality, and separated population @MONTH1 be born.     Like the beloved @PERSON2 @PERSON2 series by @PERSON1, many books and series are being taken out of libraries' collections due to people in society finding them offensive. In this case, the world of witchcraft in which this story blooms is against some religious beliefs; therefore, some individuals within a religion campaign to have these books banned. Fortunately, none of the libraries I visit, with their eclectic collections, had banned this series, or I @CAPS4 not have the strong thirst for literature as I do now. All books have the potential to pull a student into the wonderful world of reading, like @PERSON2 did for me. So how @CAPS4 you feel if your favorite book was gone from all libraries? Disrespected? That is how I @CAPS4 feel", 
           ["Focus", "Length"], True, "10th Grade", "Censorship in the Libraries",150,25,15],
    ],
    
    
    ##########################################################
    # Other settings for the UI
    ##########################################################
    allow_flagging="never",
    theme="default", #"default", "huggingface", "seafoam", "grass", "peach", "dark",
    title='Essay Evaluation and Feedback',
    
    description="This is an automated tool for student essay feedback. Unlike traditional Automated Essay Scoring \
    systems, this tool focuses on modularity and interpretability. The student inputs their essay, determines which \
    criteria to be evaluated on, and then receives instant feedback. Not only does the tool make a determination on \
    the selected criteria, it explains how it reached it's conclusion and then it recommends resources the student \
    can use to improve. The student can then score the resources they were assigned which allows the tool to \
    determine how useful the resources are and improve future recommendations. Currently, it only supports 10th \
    grade.",
    
    article="Authors: Chris Roche, Nathan Deinlein, Darryl Dawkins. \
             Developed for the Southern Methodist University, M.S. Data Science program"
)



# Lastly, launch the application
# adding share=True makes a link you can share for 72hrs
iface.launch(share=True)



# Documentation with examples:
# https://www.gradio.app/docs/



##########################################################
### END
##########################################################



Running on local URL:  http://127.0.0.1:7861/
Running on public URL: https://42786.gradio.app

This share link expires in 72 hours. For free permanent hosting, check out Spaces (https://huggingface.co/spaces)


(<gradio.routes.App at 0x7fb08437c2b0>,
 'http://127.0.0.1:7861/',
 'https://42786.gradio.app')