In [70]:
# Import necessary packages
from nltk.tokenize import sent_tokenize
import os
from textstat.textstat import textstatistics
import requests
from bs4 import BeautifulSoup
import contractions

In [77]:
### Helper functions
def getText(link):
    """
    Uses the requests library to retrieve HTML from a given webpage url and the BeautifulSoup
    library to parse the HTML, returning the text content from the url as a string.
    
    Inputs: The url of the webpage to analyze, in string format.
    Outputs: The text content of the webpage, in string format.
    """
    r = requests.get(link)
    html = r.content
    soup = BeautifulSoup(html, 'html.parser')
    
    # Get text from <p> tags
    s = ""
    for tag in soup.body.find_all('p'):
        s += tag.text
    
    # Get text from <li> tags
    list_text = ""
    for tag in soup.body.find_all('li'):
        
        # Only include list items with length 2+
        if len(tag.text) >= 2:
            list_text += tag.text + "."
    
    return s + "." + list_text




def cleanText(str):
    """
    Cleans text to a standard format to facilitate readability calculations.
    
    Inputs: The text to clean, in string format.
    Outputs: A list of individual words in the string.
    """
    # Convert all text to lowercase
    str = str.lower()
    
    # Deal with contractions
    str = contractions.fix(str)

    # Remove all punctuation, numbers, etc.
    for i in range(0,len(str)):
        if str[i] != " " and str[i].isalpha() == False:
            str = str.replace(str[i]," ")

    # Create list of words (split by space)
    words = str.split()
            
    return words




def totalWords(text):
    """
    Calculates the total number of words in the given text.

    Inputs: The text to analyze, in string format.
    Output: An integer representing the total number of words in the given input text.
    """
    return len(text.split())




def totalSentences(text):
    """
    Calculates the total number of sentences in the given text.

    Inputs: The text to analyze, in string format.
    Output: An integer representing the total number of words in the given input text.
    """
    return len(sent_tokenize(text))



    
def totalSyllables(text):
    """
    Calculates the total number of syllables in the given text.

    Inputs: The text to analyze, in string format.
    Output: A float representing the total number of syllables in the given input text.
    """
    total = 0
    for i in cleanText(text):
        total += textstatistics().syllable_count(i)
    
    return total



    
def averageWordsPerSentence(text):
    """
    Calculates the average number of words per sentence in the given text.

    Inputs: The text to analyze, in string format.
    Output: A float representing the average number of words per sentence in the given input text.
    """
    if totalSentences(text) > 0:
        return totalWords(text)/totalSentences(text)
    else:
        return 0



    
def averageSyllablesPerWord(text):
    """
    Calculates the average number of syllables per word in the given text.

    Inputs: The text to analyze, in string format.
    Output: A float representing the average number of syllables per word in the given input text.
    """
    if totalWords(text) > 0:
        return totalSyllables(text)/totalWords(text)
    else:
        return 0

In [81]:
### Readability calculators
def FRE(text):
    """
    Calculates the Flesch Reading Ease score of the given text.
    
    Inputs: The text to analyze, in string format.
    Output: A float representing the FRE score of the given input text.
    """
    if text != "":
        return 206.835 - (1.015 * averageWordsPerSentence(text)) - (84.6 * averageSyllablesPerWord(text))
    else:
        return None




def FK(text):
    """
    Calculates the Flesch-Kincaid score of the given text.
    
    Inputs: The text to analyze, in string format.
    Output: A float representing the F-K score of the given input text.
    """
    if text != "":
        return (0.39 * averageWordsPerSentence(text)) + (11.8 * averageSyllablesPerWord(text)) - 15.59
    else:
        return None

In [82]:
url = "https://policies.google.com/terms?hl=en-US"
text = getText(url)
text = ""

In [83]:
### Print full report
print("Total number of words: " + str(totalWords(text)))
print("Total number of sentences: " + str(totalSentences(text)))
print("Total number of syllables: " + str(totalSyllables(text)))
print("Average words per sentence: " + str(averageWordsPerSentence(text)))
print("Average syllables per word: " + str(averageSyllablesPerWord(text)))
print("\n")
print("FRE Score: " + str(FRE(text)))
print("F-K Score: " + str(FK(text)))

Total number of words: 0
Total number of sentences: 0
Total number of syllables: 0
Average words per sentence: 0
Average syllables per word: 0


FRE Score: None
F-K Score: None
