# Human Benchmark for Fallacy Identification

In [12]:
%load_ext autoreload
%autoreload 2

from src.experiment import get_fallacy_df

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Create Dataset

In [13]:
filename = 'data/fallacies_e11_human_empty.csv'
df_fallacies = get_fallacy_df(filename)


[2024-11-07 15:52:49] Created new fallacy identification dataframe.


In [14]:
# Randomize order
df_fallacies = df_fallacies.sample(frac=1, random_state=42)

In [15]:
# Number the reasoning steps within fallacy type and label, so we can choose how many sets to 
# respond to and still get a balanced dataset with all fallacy types and 50% correct and incorrect reasoning steps.
df_fallacies['set_number'] = df_fallacies.groupby(['fallacy', 'label'], observed=True).cumcount() + 1

In [16]:
# Verify that each set contains the same number of 1 and 0 labels
df_fallacies.groupby(['set_number'], observed=True)['label'].value_counts()

set_number  label
1           1        232
            0        232
2           1        232
            0        232
3           1        232
            0        232
4           1        232
            0        232
5           1        232
            0        232
6           1        232
            0        232
7           1        232
            0        232
8           1        232
            0        232
9           1        232
            0        232
10          1        232
            0        232
Name: count, dtype: int64

In [17]:
# Verify that each set contains all fallacy types
df_fallacies.groupby(['set_number'], observed=True)['fallacy'].nunique()

set_number
1     232
2     232
3     232
4     232
5     232
6     232
7     232
8     232
9     232
10    232
Name: fallacy, dtype: int64

In [18]:
# Entities are diverse within each set due to random order
df_fallacies.groupby(['set_number'], observed=True)['entity'].nunique()

set_number
1     436
2     450
3     446
4     433
5     440
6     435
7     442
8     440
9     439
10    442
Name: entity, dtype: int64

In [19]:
# Number the reasoning steps within subcategory and label, so each subcategory contains the same number of correct and incorrect samples: 5/5 for q90 and 10/10 for q180.
df_fallacies['subcategory_set'] = df_fallacies.groupby(['subcategory', 'label'], observed=True).cumcount() + 1
df_fallacies[['q90', 'q180']] = 0
df_fallacies.loc[df_fallacies['subcategory_set'] <= 5, 'q90'] = 1 # Short questionnaire: 90 questions
df_fallacies.loc[df_fallacies['subcategory_set'] <= 10, 'q180'] = 1 # Long questionnaire: 180 questions

In [20]:
df_q90 = df_fallacies[df_fallacies['q90'] == 1]
df_q180 = df_fallacies[df_fallacies['q180'] == 1]

In [21]:
df_q180.groupby('subcategory', observed=True)['label'].value_counts()

subcategory                label
ambiguity                  1        10
                           0        10
inappropriate presumption  1        10
                           0        10
inconsistency              1        10
                           0        10
insufficiency              1        10
                           0        10
irrelevance                1        10
                           0        10
probabilistic              1        10
                           0        10
propositional              1        10
                           0        10
quantificational           1        10
                           0        10
syllogistic                1        10
                           0        10
Name: count, dtype: int64

In [22]:
# Number of unique fallacies in each subcategory
df_q180.groupby('subcategory', observed=True)['fallacy'].nunique()

subcategory
ambiguity                    12
inappropriate presumption    17
inconsistency                 3
insufficiency                16
irrelevance                  19
probabilistic                 4
propositional                 6
quantificational              6
syllogistic                   7
Name: fallacy, dtype: int64

In [23]:
# Remove unnecessary columns and labels for blind test
df_fallacies.drop(columns=['entity', 'fallacy', 'label', 'subcategory_set'], inplace=True)

In [24]:
# Keep index so we can join back to the original dataset
df_fallacies.to_csv(filename, index=True, index_label='index')