In [1]:
%load_ext autoreload
%autoreload 2

from dotenv import load_dotenv
from src.llms import get_llms, init_langchain, LLM
from src.experiment import get_fallacy_df, save_fallacy_df, run_experiment, get_classification_prompt_template

# Load environment variables from .env file
load_dotenv()

init_langchain()

# Fallacy Experiments

## Fallacy Identification

### Experiment 1: Fallacy Identification with zero-shot Prompt

In [2]:
e1_filename = 'data/fallacies_e1.csv'
df_fallacies_e1 = get_fallacy_df(e1_filename)
df_fallacies_e1.head()

[2024-10-21 09:34:18] Loaded existing fallacy dataframe from data/fallacies_e1.csv.


Unnamed: 0,step,entity,fallacy,label,category,type,gpt_4o_response,gpt_4_response,gpt_4o_mini_response,claude_3_5_sonnet_response,claude_3_opus_response,claude_3_haiku_response,gemini_1_5_pro_response,gemini_1_5_flash_response,gemini_1_5_flash_8b_response
0,Since John asked Maria if she used the last of...,tepas,Argument from Silence,1,informal,insufficiency,No.,No,No.,No,No.,No.,No,No,No
1,Since Alice asked if Bob knew what an 'ossia' ...,ossia,Argument from Silence,1,informal,insufficiency,No.,No,No.,No,No.,No.,No,No,No
2,Since Alice claims that the Hausdorff contents...,hausdorff contents,Argument from Silence,1,informal,insufficiency,No.,No,No.,No,No.,No.,No,No,No
3,"Since Tom, a seasoned tugboater, said that ice...",tugboaters,Argument from Silence,1,informal,insufficiency,No.,No,No.,No,No.,No.,Yes,No,No
4,Since John accuses Mary of being terrified of ...,beewolf,Argument from Silence,1,informal,insufficiency,No.,No,No.,No,No.,No.,No,No,No


In [3]:
llms = get_llms([LLM.GEMINI_1_5_FLASH_8B])

prompt_template_e1 = """Is the following reasoning step correct? You can only answer "Yes" or "No".
[step]"""
run_experiment(df_fallacies_e1, e1_filename, prompt_template_e1, llms, sleep_seconds=0)

save_fallacy_df(df_fallacies_e1, e1_filename)

### Experiment 2: Fallacy Identification with few-shot Prompt

In [4]:
e2_filename = 'data/fallacies_e2.csv'
df_fallacies_e2 = get_fallacy_df(e2_filename)
df_fallacies_e2.head()

[2024-10-21 09:34:19] Loaded existing fallacy dataframe from data/fallacies_e2.csv.


Unnamed: 0,step,entity,fallacy,label,category,type,gpt_4o_response,claude_3_5_sonnet_response,gemini_1_5_pro_response,gpt_4o_mini_response,claude_3_haiku_response,gemini_1_5_flash_response,gemini_1_5_flash_8b_response
0,Since John asked Maria if she used the last of...,tepas,Argument from Silence,1,informal,insufficiency,No.,No.,No.,No.,No.,No,No
1,Since Alice asked if Bob knew what an 'ossia' ...,ossia,Argument from Silence,1,informal,insufficiency,No.,No.,No.,No.,No.,No,No
2,Since Alice claims that the Hausdorff contents...,hausdorff contents,Argument from Silence,1,informal,insufficiency,No.,No.,No.,No.,No.,No,No
3,"Since Tom, a seasoned tugboater, said that ice...",tugboaters,Argument from Silence,1,informal,insufficiency,No.,No.,Yes.,No.,No.,No,No
4,Since John accuses Mary of being terrified of ...,beewolf,Argument from Silence,1,informal,insufficiency,No.,No.,No.,No.,No.,No,No


In [5]:
# llms = get_llms([LLM.GPT_4O, LLM.CLAUDE_3_5_SONNET, LLM.GEMINI_1_5_PRO])
llms = get_llms([LLM.GPT_4O_MINI, LLM.CLAUDE_3_HAIKU, LLM.GEMINI_1_5_FLASH, LLM.GEMINI_1_5_FLASH_8B])

prompt_template_e2 = """Is the following reasoning step correct? You can only answer "Yes" or "No".
Since if it's raining then the streets are wet and it's raining now, therefore, the streets are wet.
Yes.
Since I found a shell on the beach and this shell was beautifully shaped and colored, therefore, all shells are beautifully shaped and colored.
No.
Since I am at home or I am in the city and I am at home, therefore, I am not in the city.
No.
Since heavy snowfall often leads to traffic jams and traffic jams cause delays, therefore, heavy snowfall can lead to delays.
Yes.
[step]"""

run_experiment(df_fallacies_e2, e2_filename, prompt_template_e2, llms, sleep_seconds=0)

save_fallacy_df(df_fallacies_e2, e2_filename)


### Experiment 3: Fallacy Identification with chain-of-thought Prompt

In [6]:
e3_filename = 'data/fallacies_e3.csv'
df_fallacies_e3 = get_fallacy_df(e3_filename)
df_fallacies_e3.head()

[2024-10-21 09:34:19] Loaded existing fallacy dataframe from data/fallacies_e3.csv.


Unnamed: 0,step,entity,fallacy,label,category,type,gpt_4o_response,claude_3_5_sonnet_response,gemini_1_5_pro_response,gpt_4o_mini_response,claude_3_haiku_response,gemini_1_5_flash_response,gemini_1_5_flash_8b_response
0,Since John asked Maria if she used the last of...,tepas,Argument from Silence,1,informal,insufficiency,No. The reasoning step is not necessarily corr...,Let's think through this step-by-step: 1. Joh...,No. Silence is not necessarily an admission o...,No.,Let's analyze this step-by-step: 1. John aske...,No. Silence doesn't necessarily mean guilt or...,No. Silence doesn't necessarily mean agreemen...
1,Since Alice asked if Bob knew what an 'ossia' ...,ossia,Argument from Silence,1,informal,insufficiency,No. The reasoning step is not correct. Bob's s...,Let's think through this step by step: 1. Ali...,No. Silence could indicate a lack of knowledg...,No.,Let's analyze this step-by-step: 1. Alice ask...,No. Silence doesn't imply knowledge. Bob's s...,No. Silence does not equate to knowledge. Bo...
2,Since Alice claims that the Hausdorff contents...,hausdorff contents,Argument from Silence,1,informal,insufficiency,No. The reasoning step is not correct. The con...,Let's think through this step-by-step: 1. Ali...,No. Bob's silence doesn't confirm Alice's cla...,No.,Let's analyze this step-by-step: 1. Alice cla...,No. Bob's silence doesn't provide evidence su...,"No. Alice's claim, even if true, and Bob's si..."
3,"Since Tom, a seasoned tugboater, said that ice...",tugboaters,Argument from Silence,1,informal,insufficiency,No. The reasoning step is not necessarily corr...,Let's think through this step-by-step: 1. The...,"No. While Tom's experience is valuable, the a...",No.,"Let's analyze this step-by-step: 1. Tom, a se...",No. The reasoning commits an appeal to author...,No. Just because one person says something an...
4,Since John accuses Mary of being terrified of ...,beewolf,Argument from Silence,1,informal,insufficiency,No. The reasoning step is not correct. The fac...,Let's think through this step-by-step: 1. Joh...,No. Silence does not imply guilt or fear. Ma...,No.,Let's analyze this step-by-step: 1. John accu...,No. Silence does not equal guilt or fear. Ma...,No. Silence in the face of an accusation does...


In [7]:
# llms = get_llms([LLM.GPT_4O, LLM.CLAUDE_3_5_SONNET, LLM.GEMINI_1_5_PRO])
llms = get_llms([LLM.CLAUDE_3_5_SONNET, LLM.GPT_4O_MINI, LLM.CLAUDE_3_HAIKU, LLM.GEMINI_1_5_FLASH, LLM.GEMINI_1_5_FLASH_8B])

prompt_template_e3 = """Is the following reasoning step correct?
Let's think step by step and then answer "Yes" or "No".
[step]"""

run_experiment(df_fallacies_e3, e3_filename, prompt_template_e3, llms, sleep_seconds=0)

save_fallacy_df(df_fallacies_e3, e3_filename)


## Fallacy Classification

### Experiment 4: Fallacy Classification with zero-shot Prompt

In [16]:
e4_filename = 'data/fallacies_e4.csv'
df_fallacies_e4 = get_fallacy_df(e4_filename, only_incorrect=True)

df_fallacies_e4.head()

[2024-10-24 09:34:22] Loaded existing fallacy dataframe from data/fallacies_e4.csv.


Unnamed: 0,step,entity,fallacy,label,category,subcategory,gpt_4o_response,gpt_4_response,gpt_4o_mini_response,claude_3_5_sonnet_response,claude_3_haiku_response,gemini_1_5_pro_response,gemini_1_5_flash_8b_response,o1_preview_response
0,Since John asked Maria if she used the last of...,tepas,Argument from Silence,1,informal,insufficiency,Argument from Silence,Argument from Silence,Affirming the Consequent,Argument from Silence,Denying the Antecedent,Argument from Silence,Argument from Silence,Argument from Silence
1,Since Alice asked if Bob knew what an 'ossia' ...,ossia,Argument from Silence,1,informal,insufficiency,Argument from Silence,Argument from Silence,Affirming the Consequent,Argument from Silence,Argument from Silence,Argument from Silence,Argument from Silence,Argument from Silence
2,Since Alice claims that the Hausdorff contents...,hausdorff contents,Argument from Silence,1,informal,insufficiency,Argument from Silence,Argument from Silence,Argument from Silence,Argument from Silence,Argument from Silence,Argument from Silence,Argument from Silence,Argument from Silence
3,"Since Tom, a seasoned tugboater, said that ice...",tugboaters,Argument from Silence,1,informal,insufficiency,Argument from Silence,(79) Appeal to Authority,Appeal to Authority,Appeal to Authority,Appeal to Authority,Appeal to Authority,Appeal to Authority,Argument from Silence
4,Since John accuses Mary of being terrified of ...,beewolf,Argument from Silence,1,informal,insufficiency,Argument from Silence,Argument from Silence,Argument from Silence,Argument from Silence,Argument from Silence,Argument from Silence,Argument from Silence,Argument from Silence


In [17]:
prompt_template_e4 = get_classification_prompt_template()
print(prompt_template_e4)

You are a logical fallacy classifier. Given an incorrect reasoning step, your task is to identify its type of fallacy.
Answer by choosing one of these fallacies:
(1) Affirming the Consequent
(2) Denying the Antecedent
(3) Negating Antecedent and Consequent
(4) Commutation of Conditionals
(5) Affirming a Disjunct
(6) Denying a Conjunct
(7) Fallacy of the Undistributed Middle
(8) Exclusive Premises
(9) Fallacy of Four Terms
(10) Illicit Substitution of Identicals
(11) Illicit Minor
(12) Illicit Major
(13) Negative Conclusion from Affirmative Premises
(14) Affirmative Conclusion from a Negative Premise
(15) False Conversion
(16) Unwarranted Contrast
(17) Quantifier Shift Fallacy
(18) Existential Fallacy
(19) Fallacy of Every and All
(20) Illicit Contraposition
(21) Gamblers Fallacy
(22) Hot Hand Fallacy
(23) Conjunction Fallacy
(24) Disjunction Fallacy
(25) Argument of the Beard
(26) Appeal to Extremes
(27) Type Token Fallacy
(28) Use Mention Error
(29) Reification
(30) Fake Precision
(31

In [18]:
llms = get_llms([LLM.GEMINI_1_5_PRO])

run_experiment(df_fallacies_e4, e4_filename, prompt_template_e4, llms, sleep_seconds=0.2)

save_fallacy_df(df_fallacies_e4, e4_filename)

[2024-10-24 09:34:24] Response from LLM gemini_1_5_pro (index=2055): content='None\n' additional_kwargs={} response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'safety_ratings': []} id='run-93f798dd-d614-42f4-8c52-c90451d766f3-0' usage_metadata={'input_tokens': 2243, 'output_tokens': 2, 'total_tokens': 2245}
