This codebook extracts LIWC embeddings and a simple classifier (SVC) to predict the text annotations/moral sentiment in the MFRC

## Load Packages

In [15]:
import pandas as pd
import numpy as np
import os
import pickle as pkl
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

foundations = {"mfrc":  {
                    "complete": ["care", "harm", "equality", "proportionality", "loyalty", "betrayal", "authority", "subversion", "purity", "degradation", "thin morality", "non-moral"],
                    "binding": ["individual", "binding", "proportionality", "thin morality", "non-moral"], 
                    "moral": ["moral", "thin morality", "non-moral"],
                    "full": ["care", "proportionality", "loyalty", "authority", "purity", "equality", "thin morality", "non-moral"]
               }
              }

## General Parameters

In [17]:
corp = "mfrc"
mode = "full"
training = "normal"

# set file path for models and test data (input features and labels)
model_files = ["../models/liwc_" + c + "_" + mode + ".sav" for c in foundations["mfrc"]["full"]]
sample_file = "../data/preprocessed/mfrc_sample_full.csv"
sample_features = '../data/preprocessed/mfrc_sample_full_liwc.csv'

## Functions for Predictions

In [22]:
def predict(input_file, sample_file, model_files):
    X_test = pd.read_csv(input_file, index_col=0).drop(["Segment"], axis=1) # load liwc vectors
    df_sample = pd.read_csv(sample_file) # get original sample/test file 
    pred_dict = {}
    # loop over N classes, load classifier for each and predict
    cols = foundations["mfrc"]["full"]
    for i in range(len(model_files)):
        c = cols[i]
        model = pkl.load(open(model_files[i], 'rb'))
        print("Predicting: " + c)
        y_pred = model.predict(X_test)
        pred_dict[c] = y_pred
    df_sample[cols] = pd.DataFrame(pred_dict, index=df_sample.index)
    # print(df_sample.head())
    print("Saving the prediction")
    df_sample.to_csv("../results/predictions/" + corp + "_labels_" + training + "_" + mode + "_liwc.csv", index=False)
    
def create_liwc(input_file, output_file):

    cmd_to_execute = ["LIWC-22-cli",
                  "--mode", "wc",
                  "--input", input_file,
                  "--column-indices", "1",
                  "--output", output_file]

    subprocess.call(cmd_to_execute)

## Get Test Data

In [19]:
# create_liwc(sample_file, sample_features) # extract liwc features (or use LIWC client)

## Make predictions

In [21]:
#get annoatations of texts
predict(sample_features, sample_file, model_files)

Predicting: care
Predicting: proportionality
Predicting: loyalty
Predicting: authority
Predicting: purity
Predicting: equality
Predicting: thin morality
Predicting: non-moral
Saving the prediction
