## Persuasion strategy

In [1]:
import pandas as pd 
import numpy as np 
from pathlib import Path 
import matplotlib.pyplot as plt 
import json

from collections import Counter

In [2]:
dataset_tasks = ["ecqa/secondbest", "nli/contra_to_neutral", "nli/entail_to_neutral"]
models = ["chat", "claude", "gpt4"]
strategies_v1 = ["confidence manipulation", "appeal to authority", "selective evidence", "logical fallacies", "comparative advantage framing", "reframing the question", "selective fact presentation", "analogical evidence", "detailed scenario building", "complex inference"]
strategies_v2 = [str(id_) for id_ in range(1, 1+40)]
# v2 there are 40 techniques. I'm using number to index them. Please refer to persuasion_strategy.py for the detailed list.

def count_strategy_v1(dataset_task, model):

    df = pd.read_csv(Path(f"../data/{dataset_task}/{model}/with_nle_w_strategy_v1.csv"))
    
    counter = {s:0 for s in strategies_v1}
    total = len(df)
    for i, row in df.iterrows():
        try:
            d = json.loads(row.strategy[7:-3])
        except json.JSONDecodeError:
            #print("Decode error. Here's the raw output", row.strategy)
            d = {}
        for s in d:
            if s.lower() not in counter:
                pass 
                #print(s)
            else:
                counter[s.lower()] += 1
    return [counter[s] / total for s in strategies_v1]

def count_strategy_v2(dataset_task, model):
    df = pd.read_csv(Path(f"../data/{dataset_task}/{model}/with_nle_w_strategy.csv"))
    counter = {s:0 for s in strategies_v2}
    total = len(df)
    for i, row in df.iterrows():
        try:
            d = json.loads(row.strategy[7:-3])
        except json.JSONDecodeError:
            d = {}
            print("Decode error. dataset_task={}, model={}".format(dataset_task, model))
        for s in d:
            technique_id = s.split()[0].strip(".")
            if technique_id not in counter:
                pass 
            else:
                counter[technique_id] += 1
    return [counter[s] / total for s in strategies_v2]


def count_strategy_collect_results(version=1):
    """
    Output a table
    """
    if version == 1:
        n_strategies = 10
        strategies = strategies_v1
    else:
        n_strategies = 40
        strategies = strategies_v2
    report_data = np.zeros((n_strategies, len(dataset_tasks) * len(models)))
    for i, task in enumerate(dataset_tasks):
        for j, model in enumerate(models):
            if version == 1:
                frequencies = count_strategy_v1(task, model)
            else:
                frequencies = count_strategy_v2(task, model)
            report_data[:,i*len(models) + j] = frequencies 
    header = pd.MultiIndex.from_product([dataset_tasks, models], names=["Task", "Explainer"])
    df = pd.DataFrame(report_data, index=strategies, columns=header)
    return df

df = count_strategy_collect_results(version=1)
df

Task,ecqa/secondbest,ecqa/secondbest,ecqa/secondbest,nli/contra_to_neutral,nli/contra_to_neutral,nli/contra_to_neutral,nli/entail_to_neutral,nli/entail_to_neutral,nli/entail_to_neutral
Explainer,chat,claude,gpt4,chat,claude,gpt4,chat,claude,gpt4
confidence manipulation,0.386,0.65,0.382,0.653333,0.62,0.78,0.686667,0.673333,0.623333
appeal to authority,0.03,0.026,0.046,0.003333,0.013333,0.013333,0.01,0.043333,0.003333
selective evidence,0.668,0.694,0.79,0.426667,0.476667,0.55,0.456667,0.673333,0.526667
logical fallacies,0.102,0.276,0.11,0.06,0.096667,0.06,0.086667,0.173333,0.13
comparative advantage framing,0.824,0.794,0.898,0.216667,0.31,0.373333,0.2,0.23,0.326667
reframing the question,0.532,0.572,0.482,0.936667,0.953333,0.926667,0.943333,0.943333,0.923333
selective fact presentation,0.694,0.722,0.672,0.506667,0.48,0.49,0.53,0.366667,0.556667
analogical evidence,0.028,0.01,0.016,0.01,0.02,0.013333,0.01,0.04,0.006667
detailed scenario building,0.318,0.276,0.624,0.203333,0.3,0.243333,0.123333,0.266667,0.176667
complex inference,0.044,0.008,0.044,0.053333,0.056667,0.086667,0.026667,0.07,0.033333


In [3]:
print(df.to_latex(float_format="%.2f"))

\begin{tabular}{lrrrrrrrrr}
\toprule
Task & \multicolumn{3}{l}{ecqa/secondbest} & \multicolumn{3}{l}{nli/contra\_to\_neutral} & \multicolumn{3}{l}{nli/entail\_to\_neutral} \\
Explainer &            chat & claude & gpt4 &                  chat & claude & gpt4 &                  chat & claude & gpt4 \\
\midrule
confidence manipulation       &            0.39 &   0.65 & 0.38 &                  0.65 &   0.62 & 0.78 &                  0.69 &   0.67 & 0.62 \\
appeal to authority           &            0.03 &   0.03 & 0.05 &                  0.00 &   0.01 & 0.01 &                  0.01 &   0.04 & 0.00 \\
selective evidence            &            0.67 &   0.69 & 0.79 &                  0.43 &   0.48 & 0.55 &                  0.46 &   0.67 & 0.53 \\
logical fallacies             &            0.10 &   0.28 & 0.11 &                  0.06 &   0.10 & 0.06 &                  0.09 &   0.17 & 0.13 \\
comparative advantage framing &            0.82 &   0.79 & 0.90 &                  0.22 &   0.31 & 0.

  print(df.to_latex(float_format="%.2f"))


In [4]:
df = count_strategy_collect_results(version=2)
df

Decode error. dataset_task=ecqa/secondbest, model=claude
Decode error. dataset_task=nli/contra_to_neutral, model=chat
Decode error. dataset_task=nli/contra_to_neutral, model=claude
Decode error. dataset_task=nli/entail_to_neutral, model=chat
Decode error. dataset_task=nli/entail_to_neutral, model=chat
Decode error. dataset_task=nli/entail_to_neutral, model=claude


Task,ecqa/secondbest,ecqa/secondbest,ecqa/secondbest,nli/contra_to_neutral,nli/contra_to_neutral,nli/contra_to_neutral,nli/entail_to_neutral,nli/entail_to_neutral,nli/entail_to_neutral
Explainer,chat,claude,gpt4,chat,claude,gpt4,chat,claude,gpt4
1,0.358,0.28,0.46,0.063333,0.08,0.076667,0.016667,0.063333,0.073333
2,0.61,0.58,0.782,0.103333,0.22,0.126667,0.043333,0.213333,0.206667
3,0.02,0.028,0.014,0.0,0.01,0.0,0.0,0.0,0.006667
4,0.016,0.008,0.012,0.0,0.0,0.0,0.0,0.003333,0.0
5,0.036,0.024,0.03,0.0,0.0,0.0,0.0,0.0,0.003333
6,0.048,0.026,0.054,0.0,0.003333,0.0,0.0,0.0,0.003333
7,0.014,0.016,0.03,0.0,0.0,0.0,0.0,0.0,0.003333
8,0.012,0.01,0.016,0.0,0.003333,0.003333,0.0,0.003333,0.003333
9,0.002,0.0,0.002,0.0,0.0,0.0,0.0,0.0,0.0
10,0.002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [5]:
print(df.to_latex(float_format="%.2f"))

\begin{tabular}{lrrrrrrrrr}
\toprule
Task & \multicolumn{3}{l}{ecqa/secondbest} & \multicolumn{3}{l}{nli/contra\_to\_neutral} & \multicolumn{3}{l}{nli/entail\_to\_neutral} \\
Explainer &            chat & claude & gpt4 &                  chat & claude & gpt4 &                  chat & claude & gpt4 \\
\midrule
1  &            0.36 &   0.28 & 0.46 &                  0.06 &   0.08 & 0.08 &                  0.02 &   0.06 & 0.07 \\
2  &            0.61 &   0.58 & 0.78 &                  0.10 &   0.22 & 0.13 &                  0.04 &   0.21 & 0.21 \\
3  &            0.02 &   0.03 & 0.01 &                  0.00 &   0.01 & 0.00 &                  0.00 &   0.00 & 0.01 \\
4  &            0.02 &   0.01 & 0.01 &                  0.00 &   0.00 & 0.00 &                  0.00 &   0.00 & 0.00 \\
5  &            0.04 &   0.02 & 0.03 &                  0.00 &   0.00 & 0.00 &                  0.00 &   0.00 & 0.00 \\
6  &            0.05 &   0.03 & 0.05 &                  0.00 &   0.00 & 0.00 &           

  print(df.to_latex(float_format="%.2f"))
