#### Read in Moral Foundation Dictionary

In [None]:
#File Path to Moral Foundations Dictionary
dic_file_path = 'moral foundations dictionary.dic'

moral_foundations_dict = {} # dictionary to map moral foundation to it's numeric key
word_to_moral_foundation = {} #dictionary to of word to moral foundation

line_counter = 0 #first 12 lines of dic contain the moral foundations and their numeric keys
with open(dic_file_path, 'r', encoding='utf-8') as file:
    for line in file:
        if line.strip() and not line.startswith('%'):
            parts = line.strip().split()
            moral_found =  parts[0]
            moral_found_code = parts[1:] #
            # Add to dictionary (note: you might need to adjust parsing logic)
            if line_counter < 12: #first 12 lines contain the moral foundations and their keys
                moral_foundations_dict[moral_found] = moral_found_code[0]
            else: #rest of lines contain words
                cats = [moral_foundations_dict[cat] for cat in moral_found_code]
                word_to_moral_foundation[moral_found] = cats
        line_counter+=1


In [None]:
moral_foundations_dict

In [None]:
word_to_moral_foundation

#### Expand Moral Foundation Dictionary with Word2Vec

Use HuggingFace Pre-trained model on tweets.

In [None]:
import gensim.downloader as api

model = api.load("glove-twitter-25")

In [None]:
word_to_moral_foundation_expanded = word_to_moral_foundation.copy()
expanded_dictionary = {}

similarity_threshold = 0.85

for word, categories in word_to_moral_foundation_expanded.items():
    if word in model.key_to_index:
        similar_words = model.most_similar(positive=[word], topn=100)

        # Filter based on the similarity threshold
        for similar_word, similarity_score in similar_words:
            if similarity_score >= similarity_threshold:
                expanded_dictionary[similar_word] = categories

word_to_moral_foundation_expanded.update(expanded_dictionary)

In [None]:
len(word_to_moral_foundation_expanded)

In [None]:
word_to_moral_foundation_expanded

#### Read in the Reddit Data

In [None]:
import pickle
!ls

with open('/Users/kathryn/Projects/Advanced ML/project/climate-conversations/project/data_collection/project_data/climateCommentsDf.pickle', 'rb') as f:
    comments_df = pickle.load(f)

with open('/Users/kathryn/Projects/Advanced ML/project/climate-conversations/project/data_collection/project_data/climateSubmissionsDf.pickle', 'rb') as f:
    submissions_df = pickle.load(f)

In [None]:
comments_df.head()


In [None]:
comments_df['processed_body'] = comments_df['body'].str.lower()
comments_df.shape

In [None]:
subset_comments_df = comments_df.iloc[0:1000]

In [None]:
from scipy.spatial.distance import cosine
import numpy as np

def calculate_similarity(comment_vector, foundation_vector):
    if np.all(comment_vector) and np.all(foundation_vector):
        return 1 - cosine(comment_vector, foundation_vector)
    else:
        return -1

In [None]:
def assign_moral_foundations(comment, extended_dict):
    foundations = set()
    for word in comment.split():
        for key, values in extended_dict.items():
            if key.endswith('*') and word.startswith(key[:-1]):
                foundations.update(values)
            elif word == key:
                foundations.update(values)
    return list(foundations)
subset_comments_df['moral_foundations'] = subset_comments_df['processed_body'].apply(assign_moral_foundations, extended_dict=word_to_moral_foundation_expanded)

In [None]:
display(subset_comments_df['moral_foundations'])