In [1]:
%load_ext autoreload
%autoreload 2

from dotenv import load_dotenv
from src.llms import get_llms, init_langchain, LLM
from src.experiment import get_fallacy_df, save_fallacy_df, run_experiment

# Load environment variables from .env file
load_dotenv()

init_langchain()

# Fallacy Identification Experiments

## Experiment 1: Fallacy Identification with zero-shot Prompt

In [None]:
e1_filename = 'data/fallacies_e1.csv'
df_fallacies_e1 = get_fallacy_df(e1_filename)
df_fallacies_e1.head()

In [None]:
llms = get_llms([LLM.CLAUDE_3_HAIKU])

prompt_template_e1 = """Is the following reasoning step correct? You can only answer "Yes" or "No".
[step]"""
run_experiment(df_fallacies_e1, e1_filename, prompt_template_e1, llms, sleep_seconds=0)

save_fallacy_df(df_fallacies_e1, e1_filename)

## Experiment 2: Fallacy Identification with few-shot Prompt

In [2]:
e2_filename = 'data/fallacies_e2.csv'
df_fallacies_e2 = get_fallacy_df(e2_filename)
df_fallacies_e2.head()

[2024-10-18 18:06:45] Loaded existing fallacy dataframe from data/fallacies_e2.csv.


Unnamed: 0,step,entity,fallacy,label,category,type,gpt_4o_response,claude_3_5_sonnet_response,gemini_1_5_pro_response
0,Since John asked Maria if she used the last of...,tepas,Argument from Silence,1,informal,insufficiency,No.,No.,No.
1,Since Alice asked if Bob knew what an 'ossia' ...,ossia,Argument from Silence,1,informal,insufficiency,No.,No.,No.
2,Since Alice claims that the Hausdorff contents...,hausdorff contents,Argument from Silence,1,informal,insufficiency,No.,No.,No.
3,"Since Tom, a seasoned tugboater, said that ice...",tugboaters,Argument from Silence,1,informal,insufficiency,No.,No.,Yes.
4,Since John accuses Mary of being terrified of ...,beewolf,Argument from Silence,1,informal,insufficiency,No.,No.,No.


In [3]:
# llms = get_llms([LLM.GPT_4O, LLM.CLAUDE_3_5_SONNET, LLM.GEMINI_1_5_PRO])
llms = get_llms([LLM.GEMINI_1_5_PRO])

prompt_template_e2 = """Is the following reasoning step correct? You can only answer "Yes" or "No".
Since if it's raining then the streets are wet and it's raining now, therefore, the streets are wet.
Yes.
Since I found a shell on the beach and this shell was beautifully shaped and colored, therefore, all shells are beautifully shaped and colored.
No.
Since I am at home or I am in the city and I am at home, therefore, I am not in the city.
No.
Since heavy snowfall often leads to traffic jams and traffic jams cause delays, therefore, heavy snowfall can lead to delays.
Yes.
[step]"""

run_experiment(df_fallacies_e2, e2_filename, prompt_template_e2, llms, sleep_seconds=0)

save_fallacy_df(df_fallacies_e2, e2_filename)


[2024-10-18 18:09:36] Processed 100 responses for LLM gemini_1_5_pro (index=1379).
[2024-10-18 18:12:07] Processed 200 responses for LLM gemini_1_5_pro (index=1479).
[2024-10-18 18:13:16] Processed 300 responses for LLM gemini_1_5_pro (index=1579).
[2024-10-18 18:16:18] Processed 400 responses for LLM gemini_1_5_pro (index=1679).
[2024-10-18 18:20:58] Processed 500 responses for LLM gemini_1_5_pro (index=1779).
[2024-10-18 18:23:11] Processed 600 responses for LLM gemini_1_5_pro (index=1879).
[2024-10-18 18:25:52] Processed 700 responses for LLM gemini_1_5_pro (index=1979).
[2024-10-18 18:27:48] Processed 800 responses for LLM gemini_1_5_pro (index=2079).
[2024-10-18 18:30:36] Processed 900 responses for LLM gemini_1_5_pro (index=2179).
[2024-10-18 18:36:03] Processed 1000 responses for LLM gemini_1_5_pro (index=2279).
[2024-10-18 18:38:13] Processed 1100 responses for LLM gemini_1_5_pro (index=2379).
[2024-10-18 18:42:46] Processed 1200 responses for LLM gemini_1_5_pro (index=2479).
[

## Experiment 3: Fallacy Identification with chain-of-thought Prompt

In [2]:
e3_filename = 'data/fallacies_e3.csv'
df_fallacies_e3 = get_fallacy_df(e3_filename)
df_fallacies_e3.head()

[2024-10-18 15:02:10] Created new fallacy identification dataframe.


Unnamed: 0,step,entity,fallacy,label,category,type
0,Since John asked Maria if she used the last of...,tepas,Argument from Silence,1,informal,insufficiency
1,Since Alice asked if Bob knew what an 'ossia' ...,ossia,Argument from Silence,1,informal,insufficiency
2,Since Alice claims that the Hausdorff contents...,hausdorff contents,Argument from Silence,1,informal,insufficiency
3,"Since Tom, a seasoned tugboater, said that ice...",tugboaters,Argument from Silence,1,informal,insufficiency
4,Since John accuses Mary of being terrified of ...,beewolf,Argument from Silence,1,informal,insufficiency


In [None]:
llms = get_llms([LLM.GPT_4O, LLM.CLAUDE_3_5_SONNET, LLM.GEMINI_1_5_PRO])

prompt_template_e3 = """Is the following reasoning step correct?
Let's think step by step and then answer "Yes" or "No".
[step]"""

run_experiment(df_fallacies_e3, e3_filename, prompt_template_e3, llms, sleep_seconds=0)

save_fallacy_df(df_fallacies_e3, e3_filename)


[2024-10-18 15:04:00] Processed 100 responses for LLM gpt_4o (index=99).
[2024-10-18 15:04:45] Processed 200 responses for LLM gpt_4o (index=199).
[2024-10-18 15:05:32] Processed 300 responses for LLM gpt_4o (index=299).
[2024-10-18 15:06:21] Processed 400 responses for LLM gpt_4o (index=399).
[2024-10-18 15:07:10] Processed 500 responses for LLM gpt_4o (index=499).
[2024-10-18 15:08:15] Processed 600 responses for LLM gpt_4o (index=599).
[2024-10-18 15:09:02] Processed 700 responses for LLM gpt_4o (index=699).
[2024-10-18 15:09:49] Processed 800 responses for LLM gpt_4o (index=799).
[2024-10-18 15:10:38] Processed 900 responses for LLM gpt_4o (index=899).
[2024-10-18 15:11:24] Processed 1000 responses for LLM gpt_4o (index=999).
[2024-10-18 15:12:09] Processed 1100 responses for LLM gpt_4o (index=1099).
