In [None]:
import json
def compute_unigram_frequency(comment, moral_foundations_words):
    # Assuming `comment` is already tokenized and `moral_foundations_words` is a list of relevant words.
    unigram_counts = {}
    total_words = len(comment)

    # Initialize counts for each foundation to 0
    for foundation in moral_foundations_words:
        unigram_counts[foundation] = 0

    # Count the occurrences of each foundation word in the comment
    for word in comment:
        if word in moral_foundations_words:
            unigram_counts[word] += 1

    # Handle the case where `total_words` is zero to prevent division by zero
    if total_words > 0:
        # Convert counts to frequencies
        unigram_frequencies = {word: count / total_words for word, count in unigram_counts.items()}
    else:
        # If there are no words, return a dictionary with zero frequencies
        unigram_frequencies = {word: 0 for word in moral_foundations_words}

    return unigram_frequencies


def classify_sentence_with_profile(sentence, moral_foundations_dict):
    foundation_scores = {}
    
    # Loop through each moral foundation and its associated words
    for foundation, words in moral_foundations_dict.items():
        # Compute the sum of frequencies of foundation words in the sentence
        frequencies = compute_unigram_frequency(sentence, words)
        foundation_score = sum(frequencies.values())  # Sum of all word frequencies for this foundation
        foundation_scores[foundation] = foundation_score

    return foundation_scores



with open("lemmatized_moral_foundations_dictionary.json", "r") as f:
        word_to_moral_foundation_expanded = json.load(f)

classification_profiles = []

for comment in full_df["tokenized_body_words_norm"]:
    classification_profile = classify_sentence_with_profile(
        comment, word_to_moral_foundation_expanded
    )
    classification_profiles.append(classification_profile)

# Convert classification_profiles to a DataFrame
classification_df = pd.DataFrame(classification_profiles)

# Concatenate this DataFrame with full_df
full_df = pd.concat([full_df.reset_index(drop=True), classification_df.reset_index(drop=True)], axis=1)



In [None]:
from moralstrength import string_moral_values
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

full_df = full_df[full_df['subreddit'] == 'climate']

# Assuming full_df is already defined and filtered for 'climate' subreddit
# Ensure your DataFrame is correctly loaded and filtered before this point

def process_text_and_return_dict(text, model='unigram+freq'):
    result = string_moral_values(text, model=model)
    return result

# Apply the function to each row in 'body' column and create a DataFrame from the resulting series of dictionaries
morals_expanded = full_df['body'].apply(lambda text: process_text_and_return_dict(text, 'unigram+freq'))
morals_df = pd.DataFrame(morals_expanded.tolist())

# Concatenate the new DataFrame with the original DataFrame
full_df = pd.concat([full_df.reset_index(drop=True), morals_df.reset_index(drop=True)], axis=1)
