## Notebook for template-based explanation

#### Load modules

In [None]:
import os
import sys
import json
import pandas as pd
from tqdm import tqdm
import importlib
from openai import OpenAI

    # Paths of neuro-explain modules
from main.verbalizer.utilsFunctions import *
sys.path.append(os.path.abspath('main/preprocessor'))
sys.path.append(os.path.abspath('main/verbalizer'))
sys.path.append(os.path.abspath('main'))

import FilePreprocessor
import ChaseGraphVerbalizer
import TemplatesGenerator
import CorpusPreprocessor
import AggregateVerbalizer
import TemplatesGenerator


In [None]:
program_type = 'stress_test'
csv_file_names = ['default']

# Generic Path to folder
path_g = "Knowledge_Graph_Applications/"

# Path to the chase graph
path_chase = os.path.join(path_g + program_type + '/chase_graph.json')
# Path to the dependency graph
path_plan = os.path.join(path_g + program_type+'/dependency_graph.json')
# Add path to the predicates
path_predicates = os.path.join('Domain_Glossary/' + program_type +'/predicates.json')
# Add path to the a folder where results will be stored
path_output = os.path.join(path_g + program_type+'/')
# Path to the CSV files generated by Vadalog
path_csv_output = path_g + program_type+'/'

#### Deterministic Explanation via Chase Graph

In [None]:
## First, the chase graph is pre-processed, adding atoms that contributed to aggregations and by adding number to each step
# importlib.reload(FilePreprocessor)
FilePreprocessor.FilePreprocessor().integrate_previous_contributors_to_aggregations(path_chase,path_output)
FilePreprocessor.FilePreprocessor().number_chase_graph(path_output+'aggr_chase_graph.json',path_output)
path_num_chase = os.path.join(path_output, 'num_chase_graph.json')


## Then, we can verbalize the entire chase graph, to obtain the deterministic explanations
# importlib.reload(ChaseGraphVerbalizer)
chasegraph_verbalizer = ChaseGraphVerbalizer.ChaseGraphVerbalizer()
chasegraph_verbalizer.verbalize_chase_graph(path_num_chase, path_predicates, path_output)
path_verb_chase = os.path.join(path_output, 'verb_chase_graph.json')

#### Template Generation

In [None]:
templates = TemplatesGenerator.TemplatesGenerator().get_program_paths(path_plan, path_output, path_predicates)
templates_rec = TemplatesGenerator.TemplatesGenerator().get_recursive_template(templates,path_output, path_predicates)

In [None]:
# Get API Key

client = OpenAI(api_key="YOURAPIKEY")


In [None]:
paraphrased_templates = list()

for i in range(len(templates[2])):
    explanation = ' '.join(templates[2][i])
    prompt = "Rephrase the following text: " + "\"" + explanation + "\" "
    response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            # model="gpt-4-1106-preview",
                messages=[
                    {
                        "role": "user",
                        "content": prompt,
                    },
                ],
            temperature=1,
            max_tokens=1024,
            top_p=1,
            frequency_penalty=0,
            presence_penalty=0
            )
    paraphrased_templates.append(response.choices[0].message.content.strip()\
                                .replace('If','Since').replace('if','since')\
                                .replace('provided that','since')\
                                .replace('Assuming','Since').replace('assuming','since').replace('\"',''))
templates_full = templates + (paraphrased_templates,)

tt = list()
for i in templates[2]:
    tt.append(' '.join(i))

df = pd.DataFrame(list(zip(tt, templates_full[3])), columns = ['Original Verbalization', 'Paraphrased Verbalization'])
# Assuming the variable df contains the relevant DataFrame
display(df.style.set_properties(**{'white-space': 'pre-wrap',}))

Save the generated templates

In [None]:
with open(path_output+'/templates.json', "w") as f:
    json.dump(templates_full, f)

#### Load templates

In [None]:
with open(path_output+'templates.json', 'r') as jsonfile:
    templates_full = json.load(jsonfile)

#### Load facts to explain

In [None]:
facts_to_explain = CorpusPreprocessor.CorpusPreprocessor().get_list_output_facts(csv_file_names, path_output, path_csv_output)

In [None]:
chase_fact = list()
for i in tqdm(range(len(facts_to_explain))):
    chase_fact.append(AggregateVerbalizer.VerbalizationFinder().get_chase_fact(path_num_chase,facts_to_explain[i]))
# chase_fact

#### Generate the template-based explanations

In [None]:
importlib.reload(TemplatesGenerator)

df = pd.DataFrame(columns=['DeterministicVerbalization','TemplateApproach'])

for i in tqdm(range(len(facts_to_explain))):
    try:
        df = pd.concat([df,TemplatesGenerator.TemplatesGenerator().mapping_to_template(chase_fact[i][0], chase_fact[i][1], templates_full, templates_full, path_output, facts_to_explain[i], path_verb_chase)])
    except:
        print('Failed at mapping fact: ' + facts_to_explain[i])

df = df.reset_index(drop = True)
display(df.iloc[-10:].style.set_properties(**{'white-space': 'pre-wrap',}))