In [None]:
import spacy
import re
from collections import Counter
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.corpus import stopwords
import numpy as np

In [None]:
# load model with spacy
model = spacy.load('en_core_web_sm')

In [None]:
# predefined hedging words from hylands article
hedging_words = {
    "modal_verbs": ["might", "could", "may", "would", "should"],
    "epistemic_verbs": ["seem", "appear", "suggest", "indicate", "assume"],
    "adverbs": ["possibly", "probably", "apparently", "likely", "arguably"],
    "adjectives": ["possible", "probable", "uncertain", "hypothetical"],
    "phrases": ["it seems", "it is possible", "there is a chance"]
}

In [None]:
def get_hedging_frequency(text, hedging_words=hedging_words):
    processed_text = re.sub(r'\s+', ' ', text) # clean up unncessary white space in text
    spacy_doc = model(processed_text) # parse text with spaCy

    # scan text and count hedging words
    hedge_counts = Counter()
    for token in spacy_doc:
        for category, words in hedging_words.items():
            if token.text in words:
                hedge_counts[category] += 1

    # scan text and
    for phrase in hedging_words["phrases"]:
        hedge_counts["phrases"] += len(re.findall(re.escape(phrase), processed_text))

    total_words = len(spacy_doc)
    hedge_frequencies = {category: (count / total_words) * 100 for category, count in hedge_counts.items()}

    return hedge_counts, hedge_frequencies


In [None]:
test_response = '''
A meaningful experience Iâ€™ve had recently might be the time I volunteered at a local food bank. It seems that such experiences can often provide a deeper perspective on community needs. I would say that interacting with both the volunteers and the people receiving assistance was possibly one of the most eye-opening aspects. It felt like a reminder of how small actions could potentially make a larger impact. Although I can't be entirely sure, I think this experience has encouraged me to be more involved in similar activities in the future.
'''

In [None]:
hedge_counts, hedge_frequencies = get_hedging_frequency(test_response)

In [None]:
hedge_counts

Counter({'modal_verbs': 3, 'adverbs': 1, 'phrases': 0})

In [None]:
hedge_frequencies

{'modal_verbs': 2.941176470588235,
 'adverbs': 0.9803921568627451,
 'phrases': 0.0}

In [None]:
# make temporary slang dictionary with polarity
SLANG_DICTIONARY = {
    "lol": 1.0, "omg": 1.0, "idk": 0.5, "brb": 0.5,
    "damn": -0.5, "hell": -0.7, "crap": -1.0, "fuck": -2.0, "shit": -1.5
}

def get_slang_frequency(text, slang_dict):
    # remove whitespace and punctuation
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)
    tokens = text.split()
    total_words = len(tokens)

    # count slang works from processed text
    slang_counts = Counter(word for word in tokens if word in slang_dict)
    normalized_frequency = sum(slang_counts.values()) / total_words

    return slang_counts, normalized_frequency

def calculate_sentiment(text, slang_dict):
    # remove whitespace and punctuation
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)
    tokens = text.split()
    total_words = len(tokens)

    polarity_scores = [slang_dict[word] for word in tokens if word in slang_dict]
    return sum(polarity_scores), len(polarity_scores)

def intimacy_score(slang_frequency, sentiment_score, slang_count, total_words):
    normalized_frequency = slang_frequency / total_words
    intimacy = normalized_frequency + sentiment_score / (slang_count + 1)
    return intimacy


Slang Counts: Counter({'damn': 1, 'hell': 1, 'lol': 1, 'omg': 1, 'crap': 1, 'idk': 1})
Normalized Slang Frequency: 0.2857142857142857
Total Sentiment Score: 0.30000000000000004


In [None]:
sample_text = """
Damn, I don't know what the hell is going on. LOL, omg, this is such crap. BTW, IDK why you're SMH.
"""

get_slang_frequency(sample_text, SLANG_DICTIONARY)

(Counter({'damn': 1, 'hell': 1, 'lol': 1, 'omg': 1, 'crap': 1, 'idk': 1}),
 0.2857142857142857)

In [None]:
calculate_sentiment(sample_text, SLANG_DICTIONARY)

(0.30000000000000004, 6)