In [3]:
import numpy as np
import spacy
from spacy_syllables import SpacySyllables

In [8]:
def flesch_kincaid(text):
    # Instantiate model and add syllables pipeline
    nlp = spacy.load('en_core_web_lg')
    syllables = SpacySyllables(nlp)
    nlp.add_pipe('syllables', after='tagger')
    # Instantiate doc object and generate lists and counts of needed variables
    doc = nlp(text)
    sentences = [sent for sent in doc.sents]
    words = [token for token in doc if token.is_punct == False]
    syllables = [token._.syllables_count for token in doc \
                 if token._.syllables is not None]
    sent_count, word_count = len(sentences), len(words)
    syll_count = sum(syllables)
    # Find average words per sentence (WPS) and avg syllables per word (SPW)
    wps = word_count/sent_count
    spw = syll_count/word_count
    # Plug into the equation
    level = 0.39*wps + 11.8*spw - 15.59
    
    level_dict = {
        0: 'first',
        1: 'first',
        2: 'second',
        3: 'third',
        4: 'fourth',
        5: 'fifth',
        6: 'sixth',
        7: 'seventh',
        8: 'eighth',
        9: 'nineth',
        10: 'tenth',
        11: 'eleventh',
        12: 'twelfth',
        13: 'university freshman',
        14: 'university sophomore',
        15: 'university junior',
        16: 'university senior',
        17: 'post-graduate'
    }
    
    return f"{level_dict[round(level)]} grade level"

In [9]:
sample_text = "Hi, my name is Harry Potter. I am a wizard, but I did " + \
    "not know I was a wizard until I was ten years old. I have a " + \
    "white owl named Hedwig. My two best friends are Ron and Hermione." + \
    " We go on lots of adventures. I met them my first year at " + \
    "Hogwarts.  We rode on the same train carriage. Hermione " + \
    "fixed my glasses with her wand, and Ron ate chocolate frogs" + \
    "with me."

flesch_kincaid(sample_text)

'first grade level'

In [10]:
sample_text_2 = "One objective of the degradation of foodstuffs is" + \
" to convert the energy contained in chemical bonds into the " + \
"energy-rich compound adenosine triphosphate (ATP), which captures the" + \
"chemical energy obtained from the breakdown of food molecules and" + \
" releases it to fuel other cellular processes. In eukaryotic cells " + \
"(that is, any cells or organisms that possess a clearly defined " + \
"nucleus and membrane-bound organelles) the enzymes that catalyze" + \
" the individual steps involved in respiration and energy conservation" + \
" are located in highly organized rod-shaped compartments called" + \
" mitochondria. In microorganisms the enzymes occur as components of" + \
" the cell membrane. A liver cell has about 1,000 mitochondria; large " + \
"egg cells of some vertebrates have up to 200,000."

flesch_kincaid(sample_text_2)

'university senior grade level'

In [15]:
n = open("first_book.txt", "r")
first_hp_book = n.read()

In [19]:
flesch_kincaid(first_hp_book[:round(len(first_hp_book)/4)])

'second grade level'

In [20]:
flesch_kincaid(first_hp_book[round(len(first_hp_book)/4):round(2*len(first_hp_book)/4)])

'second grade level'

In [21]:
flesch_kincaid(first_hp_book[2*round(len(first_hp_book)/4):3*round(len(first_hp_book)/4)])

'second grade level'

In [22]:
flesch_kincaid(first_hp_book[3*round(len(first_hp_book)/4):])

'first grade level'

In [16]:
len(first_hp_book)

471988