# Inconsistency checker and sentiment analysis

This ipynb file contain the code for creating an inconsistency checker program and a sentiment analysis model.

## Inconsistency checker

By running the following cell, all valid Horn clause formulas form the file ``encoding-results.tsv`` will be converted to ASP facts and rules. All rules and facts from the NL dataset are saved in ``results/horn_inconsistency_checker.lp``. The function find_min_set will isolate all conflicting norms. 

In [87]:
import pandas as pd
import re
import copy
from itertools import permutations
import json
import clingo


df = pd.read_csv('results/encoding-results.tsv', header=0, sep='\t')
valid_fol_df = df[df["horn-eval"]==1]
edit = valid_fol_df[valid_fol_df.num != 1107] # remove juvenile case
valid_horn = edit["horn"]

def strip_parenthesis(s):
    try:
        while s[0] == '(':
            s = s[1:]
        if s.count('(') == s.count(')'):
            return s
        while s[-2] == ')':
            s = s[:-1]
        return s
    except IndexError:
        return s
    

def pred_eval(horn):
    
    predsplit = (re.split("∨|∧", horn))
    eval = ""
    remove = ""
    for i in range(len(predsplit)):
        horn_edit = strip_parenthesis(predsplit[i].strip())
        if "evaluation(BAD)" in horn_edit:
            eval = "bad"
            remove = predsplit[i]
        elif "evaluation(GOOD)" in horn_edit:
            eval = "good"
            remove = predsplit[i]
        else:
            predsplit[i] = (horn_edit.strip())
    predsplit.remove(remove)
    return [eval, predsplit]


def change_syntax(horn):

    p_norm = pred_eval(horn)
    p_rule = copy.deepcopy(p_norm)
    for i in range(len(p_rule[1])):

        p_rule[1][i] = p_rule[1][i].replace("¬", "-")
        p_rule[1][i] = p_rule[1][i].lower()
        p_rule[1][i] = re.sub(r'(v)(\d+)', r'V\2', p_rule[1][i])

    
    for i in range(len(p_norm[1])):
        p_norm[1][i] = p_norm[1][i].replace("¬", "-")
        p_norm[1][i] = p_norm[1][i].lower()

    return p_rule, p_norm

prolog_dict = {}

def create_norm(p_norm):
    perm = list(permutations(p_norm[1]))

    norm = str(len(perm))+"{"
    for p in perm:
        if len(perm) == 1:
            p = str(p)
            p = p[:-2] + p[-1:]
        part = "rule("+str(p_norm[0])+","+str(p)
        norm += part.replace("'", "")+"); "
    return norm[:-2]+"}"+str(len(perm))+"."

def create_rule(p_rule):
    rule = ":- rule(good,("
    for p in p_rule[1]:
        rule += str(p)+", "
    rule = rule[:-2]
    rule += ")), rule(bad,("
    for p in p_rule[1]:
        r = re.sub(r'(V)(\d+)', r'W\2', p)
        rule += str(r)+", "
    return rule[:-2] + "))."

prolog_dict = {}
valid_horn = list(valid_horn)

for horn in valid_horn:
    if "∧" in horn: # don't know how to solve this yet
        continue
    p_rule, p_norm = change_syntax(horn)
    norm = (create_norm(p_norm))
    rule = (create_rule(p_rule))
    prolog_dict[horn] = [norm, rule]
    
# Load the JSON data into a Python dictionary
#data = json.loads(prolog_dict)

# Prepare the content for the clingo file
clingo_content = ""

for norm, rule in prolog_dict.items():
    clingo_content += f"% {norm}\n" + "\n".join(rule) + "\n%---CLAUSE---\n"

# Show the prepared content
# print(clingo_content)

# # Save the content to a clingo file
# with open("horn_inconsistency_checker.lp", "w") as file:
#     file.write(clingo_content)

In [88]:
min_set = ""

def find_min_set(content):
    global min_set
    rest = ""
    clt = clingo.Control()
    clt.add("base", [], content)
    clt.ground([("base", [])])
    result = clt.solve()
    while result.satisfiable == False:
        current = content[:content.find('%---CLAUSE---')]
        content = content[content.find('%---CLAUSE---')+14:]
        clt = clingo.Control()
        clt.add("base", [], content)
        clt.ground([("base", [])])
        res = clt.solve()
        if res.satisfiable == True:
            min_set += '---------------------------\n'+current+'%---CLAUSE---\n'
            clt = clingo.Control()
            clt.add("base", [], content)
            clt.ground([("base", [])])
            result = clt.solve()
            clt = clingo.Control()
            clt.add("base", [], current)
            clt.ground([("base", [])])
            single = clt.solve()
            if single.satisfiable == True:
                for clause in content.split('%---CLAUSE---'):
                    test = current+ '\n'+clause
                    clt = clingo.Control()
                    clt.add("base", [], test)
                    clt.ground([("base", [])])
                    res1 = clt.solve()
                    if res1.satisfiable == False:
                        min_set += clause+'%---CLAUSE---\n'
                        content = content.replace(clause,'')
                min_set+='---------------------------'
        else:
            rest += current+'%---CLAUSE---\n'
    rest += content
    clt = clingo.Control()
    clt.add("base", [], rest)
    clt.ground([("base", [])])
    final = clt.solve()   
    if final.satisfiable == False:
        find_min_set(rest)
    return min_set



sett = find_min_set(clingo_content)

<block>:2778:13-16: info: operation undefined:
  (-co)

<block>:2778:105-108: info: operation undefined:
  (-co)

<block>:2778:221-224: info: operation undefined:
  (-co)

<block>:2778:343-346: info: operation undefined:
  (-co)

<block>:2778:411-414: info: operation undefined:
  (-co)

<block>:2778:527-530: info: operation undefined:
  (-co)

<block>:2779:15-18: info: operation undefined:
  (-co)

<block>:3:4-56: info: atom does not occur in any rule head:
  rule(good,((-familyof(V4,V3)),(-askstoomuch(V3,V4))))

<block>:7:74-141: info: atom does not occur in any rule head:
  rule(bad,((-goesshopping(W5)),(-people(W6)),(-new(W6)),(-meet(W5,W6))))

<block>:11:80-153: info: atom does not occur in any rule head:
  rule(bad,((-firstpaycheck(W8)),(-belongsto(W8,W7)),(-excitedabout(W7,W8))))

<block>:15:71-135: info: atom does not occur in any rule head:
  rule(bad,((-safetoeat(W11)),(-servesto(W9,W10,W11)),(-food(W11))))

<block>:19:4-73: info: atom does not occur in any rule head:
  rule(g

In [89]:
print(str(sett))

---------------------------
% (¬Co-workerOf(v180, v179) ∨ ¬WeddingOf(v181, v179) ∨ ¬InvitesTo(v179, v180, v181) ∨ evaluation(BAD))
6{rule(bad,(-co-workerof(v180, v179), -weddingof(v181, v179), -invitesto(v179, v180, v181))); rule(bad,(-co-workerof(v180, v179), -invitesto(v179, v180, v181), -weddingof(v181, v179))); rule(bad,(-weddingof(v181, v179), -co-workerof(v180, v179), -invitesto(v179, v180, v181))); rule(bad,(-weddingof(v181, v179), -invitesto(v179, v180, v181), -co-workerof(v180, v179))); rule(bad,(-invitesto(v179, v180, v181), -co-workerof(v180, v179), -weddingof(v181, v179))); rule(bad,(-invitesto(v179, v180, v181), -weddingof(v181, v179), -co-workerof(v180, v179)))}6.
:- rule(good,(-co-workerof(V180, V179), -weddingof(V181, V179), -invitesto(V179, V180, V181))), rule(bad,(-co-workerof(W180, W179), -weddingof(W181, W179), -invitesto(W179, W180, W181))).
%---CLAUSE---
---------------------------
% (¬Partner(v184, v185) ∨ ¬LargePurchase(v186) ∨ ¬InvolvedInProcessOf(v184, v186) ∨

## Sentiment analysis

The following cell will first extract the labels of each FOL from the manually encoded FOLs from the file ``annotated-encodings.tsv``. The labeles are found based on whether or not the manual encoding use the word "GOOD" or "BAD" in the evaluation part.

Using the same technique, we extract the labels of each FOL from the automatically encoded FOLs fromthe same file. 

We then apply a fine-tuned DistilBERT sentiment analysis model on the norms from the same file. The SA labels are compared to the correct labels from the manual encoding in a cross table to give an incidcation of how well the SA model is able to predict the labels. The labels from the automatic encoding is also compared to the correct labels in a cross table to give an indication of how well the encoding system is able to predict labels.

In [None]:
from transformers import pipeline

df = pd.read_csv('results/annotated-encodings.tsv', sep='\t', header=0)

trans = []
adjust = []
for index, row in df.iterrows():
    i = (row['manual_fol_encoding'].find('→'))
    if 'GOOD' in row['manual_fol_encoding'][i:]:
        trans.append(1)
    elif 'BAD' in row['manual_fol_encoding'][i:]:
        trans.append(0)
    else:
        trans.append(-1)
        
df = df.assign(manual_fol_encoding_sentiment=trans)

for index, row in df.iterrows():
    i = (row['automated_fol_encoding'].find('→'))
    if 'GOOD' in row['automated_fol_encoding'][i:]:
        adjust.append(1)
    elif 'BAD' in row['automated_fol_encoding'][i:]:
        adjust.append(0)
    else:
        adjust.append(-1)
    
df = df.assign(automated_fol_encoding_sentiment=adjust)

sdf = df.copy()
sdf = sdf.filter(['num', 'input_sequence', 'manual_fol_encoding', 'automated_fol_encoding', 'automated_fol_encoding-eval', 'manual_fol_encoding_sentiment', 'automated_fol_encoding_sentiment'])
sentences = sdf['input_sequence'].tolist()
sentiment_df = sdf.filter(['num', 'manual_fol_encoding_sentiment', 'automated_fol_encoding_sentiment'])

sentiment_pipeline = pipeline(model="emmabjor/finetuning-sentiment-model")

data = sentences
finetuned = sentiment_pipeline(data)

sentiment = []
for el in finetuned:
    if el['label'] == "LABEL_1":
        sentiment.append(1)
    elif el['label'] == "LABEL_0":
        sentiment.append(0)
    else:
        sentiment.append(-1)
        
sentiment_df = sentiment_df.assign(finetuned_sentiment = sentiment)

translation_sentiment_array = sentiment_df['manual_fol_encoding_sentiment'].to_numpy()
adjustment_sentiment_array = sentiment_df['automated_fol_encoding_sentiment'].to_numpy()
sentiment_model_array = sentiment_df['finetuned_sentiment'].to_numpy()
finetuned_ct = pd.crosstab(translation_sentiment_array, sentiment_model_array, rownames=['Manual encoding Sentiment'], colnames=['Fine Tuned Sentiment'], normalize='index')
print(finetuned_ct)
print()
finetuned_ct_adjusted = pd.crosstab(translation_sentiment_array, adjustment_sentiment_array, rownames=['Manual encoding Sentiment'], colnames=['Automatic encoding Sentiment'], normalize='index')
print(finetuned_ct_adjusted)