In [1]:
import pandas as pd
import difflib

df = pd.read_csv('outputs/all.csv')
df['Label'] = df['Label'].str.lower()
df

Unnamed: 0,fact_checked_segment,comment_by_fact-checker,Dataset,Label,debunk
0,there has been no systematic increase in the f...,It’s not clear how you judge “systematic incre...,Dev,cherry picking,"Title: ""Extreme Weather and Climate Change""\nF..."
1,"“Earlier this month, NASA scientists provided ...",The point in the second sentence may be true f...,Train,cherry picking,"Title: ""NASA Visualization of Climate Change a..."
2,“If our climate conversation managed to includ...,"True, but the author should set the example an...",Train,cherry picking,"Title: ""A Balanced Climate Conversation""\nFact..."
3,“Ice cap is disappearing far more rapidly than...,"Again, “ice sheet” is more accurate than “ice ...",Train,vagueness,"Title: ""Disappearance of Ice Caps""\nFact: Rece..."
4,More than 100 climate models over the past 30 ...,"Rather than take Plimer’s word for this, a qui...",Test,no fallacy,"Title: ""Climate Models and Carbon Dioxide""\nFa..."
...,...,...,...,...,...
678,“Sea ice thickness also substantially declined...,This is an important change to emphasize becau...,Train,no fallacy,"Title: ""Declining Sea Ice Thickness""\nFact: Se..."
679,“Global human emissions are only 3 per cent of...,Global human emissions are indeed only a small...,Test,red herring,"Title: ""Human Emissions and Climate Change""\nF..."
680,as continents rise after the overlying ice has...,Glacial Isostatic Adjustment (GIA) also causes...,Train,false cause,"Title: ""Post-Ice Age Continents""\nFact: During..."
681,“There is now less sea ice on Earth than at an...,One has to bear in mind that the seasonal deve...,Train,hasty generalization,"Title: ""Record Low Sea Ice Levels""\nFact: Scie..."


In [2]:
def expand_response(response):
    d = {}
    try:
        answer = response.split('\n')
        for component in answer:
            title, content = component.split(':')
            d[title] = content.strip()
    except:
        return response
    return d

def prediction(sentence):
    # Convert the sentence to lowercase for case-insensitive matching
    # Split the sentence into individual words
    sentence = sentence.lower().split()

    # Transform 'no fallacy' into 'nofallacy' within the sentence
    sentence = [word.replace('no', 'nofallacy') if word == 'no' else word for word in sentence]
    # Remove the word "fallacy" from the sentence if present
    sentence = [word for word in sentence if word != 'fallacy']
    
    closest_label = None
    closest_ratio = 0

    labels = ['cherry picking', 'vagueness', 'nofallacy', 'false authority', 'evading the burden of proof', 'red herring', 'false analogy', 'false cause', 'strawman', 'hasty generalization', 'post hoc', 'causal oversimplification']
    # Iterate over the labels and find the closest match within the words
    for label in labels:
        label = label.lower()
        matches = difflib.get_close_matches(label, sentence, n=1)
        if matches:
            ratio = difflib.SequenceMatcher(None, label, matches[0]).ratio()
            if ratio > closest_ratio:
                closest_label = label
                closest_ratio = ratio

    return closest_label

In [3]:
df['debunk'] = df['debunk'].apply(expand_response)
df['dict'] = df['debunk'].apply(lambda x: isinstance(x, dict))

In [4]:
preds = []
incorrect_structure = []
for i in df.index:
    if df['dict'].loc[i] == True:
        label = df['Label'].loc[i]
        try:
            sentence = df['debunk'].loc[i]['Fallacy']
            preds.append(prediction(sentence))
        except:
            incorrect_structure.append(i)
            preds.append(False)
    else:
        preds.append(False)

df['predictions'] = preds
df['predictions'] = df['predictions'].replace('nofallacy', 'no fallacy')

# Changed results from original "no fallacy":
df.loc[30, 'predictions'] = 'cherry picking'
df.loc[34, 'predictions'] = 'strawman'
df.loc[316, 'predictions'] = 'false analogy'
df.loc[459, 'predictions'] = 'causal oversimplification'
df.loc[539, 'predictions'] = 'cherry picking'
df.loc[552, 'predictions'] = 'false analogy'
df.loc[621, 'predictions'] = 'cherry picking'
df.loc[634, 'predictions'] = 'false cause'

# assumes None == no fallacy, False == no fallacy
df['predictions'] = df['predictions'].fillna('no fallacy')
df['predictions'] = df['predictions'].replace(False, 'no fallacy')

# map false cause and post hoc to causal oversimplification:
df['Label'] = df['Label'].replace('false cause', 'causal oversimplification')
df['Label'] = df['Label'].replace('post hoc', 'causal oversimplification')
df['predictions'] = df['predictions'].replace('false cause', 'causal oversimplification')
df['predictions'] = df['predictions'].replace('post hoc', 'causal oversimplification')

# check correct results
df['corrects'] = df['Label'] == df['predictions']

df


Unnamed: 0,fact_checked_segment,comment_by_fact-checker,Dataset,Label,debunk,dict,predictions,corrects
0,there has been no systematic increase in the f...,It’s not clear how you judge “systematic incre...,Dev,cherry picking,"{'Title': '""Extreme Weather and Climate Change...",True,cherry picking,True
1,"“Earlier this month, NASA scientists provided ...",The point in the second sentence may be true f...,Train,cherry picking,"{'Title': '""NASA Visualization of Climate Chan...",True,false analogy,False
2,“If our climate conversation managed to includ...,"True, but the author should set the example an...",Train,cherry picking,"{'Title': '""A Balanced Climate Conversation""',...",True,no fallacy,False
3,“Ice cap is disappearing far more rapidly than...,"Again, “ice sheet” is more accurate than “ice ...",Train,vagueness,"{'Title': '""Disappearance of Ice Caps""', 'Fact...",True,no fallacy,False
4,More than 100 climate models over the past 30 ...,"Rather than take Plimer’s word for this, a qui...",Test,no fallacy,"{'Title': '""Climate Models and Carbon Dioxide""...",True,causal oversimplification,False
...,...,...,...,...,...,...,...,...
678,“Sea ice thickness also substantially declined...,This is an important change to emphasize becau...,Train,no fallacy,"{'Title': '""Declining Sea Ice Thickness""', 'Fa...",True,no fallacy,True
679,“Global human emissions are only 3 per cent of...,Global human emissions are indeed only a small...,Test,red herring,"{'Title': '""Human Emissions and Climate Change...",True,causal oversimplification,False
680,as continents rise after the overlying ice has...,Glacial Isostatic Adjustment (GIA) also causes...,Train,causal oversimplification,"{'Title': '""Post-Ice Age Continents""', 'Fact':...",True,causal oversimplification,True
681,“There is now less sea ice on Earth than at an...,One has to bear in mind that the seasonal deve...,Train,hasty generalization,"{'Title': '""Record Low Sea Ice Levels""', 'Fact...",True,no fallacy,False


In [12]:
df['corrects'].value_counts()

False    555
True     128
Name: corrects, dtype: int64

In [11]:
df[(df['dict'] == True) & (~df.index.isin(incorrect_structure))]['corrects'].value_counts()

False    505
True     104
Name: corrects, dtype: int64

In [20]:
for truelabel in df.Label.unique():
    print(truelabel + ':')
    print(df[df['Label'] == truelabel]['corrects'].value_counts())
    print("-"*10)

cherry picking:
False    74
True     31
Name: corrects, dtype: int64
----------
vagueness:
False    74
Name: corrects, dtype: int64
----------
no fallacy:
False    142
True      64
Name: corrects, dtype: int64
----------
false authority:
False    46
True      4
Name: corrects, dtype: int64
----------
evading the burden of proof:
False    48
Name: corrects, dtype: int64
----------
red herring:
False    68
True      1
Name: corrects, dtype: int64
----------
false analogy:
False    25
True      2
Name: corrects, dtype: int64
----------
causal oversimplification:
False    34
True     26
Name: corrects, dtype: int64
----------
strawman:
False    36
Name: corrects, dtype: int64
----------
hasty generalization:
False    8
Name: corrects, dtype: int64
----------


In [35]:
df['fact_checked_segment'][df['Label'] == 'cherry picking'].sample(5)

223    “‘With levels of carbon dioxide in the atmosph...
78     “This means the global temperature trend has n...
578    But the heads of small island nations, fearful...
206    If warming temperatures brought on by greenhou...
458    “Sea level rise is global. But due to a variet...
Name: fact_checked_segment, dtype: object

In [38]:
df['fact_checked_segment'].loc[223]

'“‘With levels of carbon dioxide in the atmosphere consistently breaking new records, the influence of human activities on the climate system has become more and more evident,’ said Taalas.”'

In [30]:
df.Label.unique()

array(['cherry picking', 'vagueness', 'no fallacy', 'false authority',
       'evading the burden of proof', 'red herring', 'false analogy',
       'causal oversimplification', 'strawman', 'hasty generalization'],
      dtype=object)