## Load and align dataset from all three sources

In [1]:
import pandas as pd
import json
import csv
import os

In [2]:
root = '../data_2023_06_02'

climate_in = os.path.join(root, 'preprocessed/CLIMATE-FEVER')
pubhealth_in = os.path.join(root, 'preprocessed/PUBHEALTH')
fever_in = os.path.join(root, 'preprocessed/FEVER')

In [3]:
def read_json(fp):
    with open(fp, "r", encoding="utf-8") as f:
        data = []
        for line in f.readlines():
            data.append(json.loads(line.strip()))
        return data

## Load Fever Data

In [4]:
fever_test_ds = read_json(os.path.join(fever_in, 'test_preprocessed.ns.rand.jsonl'))
fever_dev_ds = read_json(os.path.join(fever_in, 'dev_preprocessed.ns.rand.jsonl'))
fever_train_ds = read_json(os.path.join(fever_in, 'train_preprocessed.ns.rand.jsonl'))

In [5]:
print(len(fever_test_ds))
print(len(fever_dev_ds))
print(len(fever_train_ds))

9999
9999
145449


## Load Climate Data

In [6]:
climate_ds = read_json(os.path.join(climate_in, 'climate-fever.jsonl'))

In [7]:
print(len(climate_ds))

1381


## Load PubHealth Data

In [8]:
pubhealth_train_ds = read_json(os.path.join(pubhealth_in, 'train.jsonl'))
pubhealth_dev_ds = read_json(os.path.join(pubhealth_in, 'dev.jsonl'))
pubhealth_test_ds = read_json(os.path.join(pubhealth_in, 'test.jsonl'))

In [9]:
print(len(pubhealth_train_ds))
print(len(pubhealth_dev_ds))
print(len(pubhealth_test_ds))

9806
1217
1235


## Investigate Data

In [10]:
fever_test_ds[0]

{'id': 113501,
 'verifiable': 'NOT VERIFIABLE',
 'label': 'NOT ENOUGH INFO',
 'claim': 'Grease had bad reviews.',
 'evidence': [[[133128, None, 'Ponizovsky_House', -1]]],
 'evidence_text': ['Period photographs show that the building had a flattened yet prominent dome above the main entrance and a smaller curvilinear gable above Povarskaya street facade ; overall styling was reserved , with clear vertical lines of windows cut through a tiled wall .']}

In [13]:
fever_dev_ds[324]

{'id': 205648,
 'verifiable': 'VERIFIABLE',
 'label': 'REFUTES',
 'claim': 'St. Anger was released on June 3, 2003.',
 'evidence': [[[242995, 244279, 'St._Anger', 0]]],
 'evidence_text': ['St. Anger is the eighth studio album by American heavy metal band Metallica , released on June 5 , 2003 , by Elektra Records .']}

In [14]:
fever_train_ds[224]

{'id': 208197,
 'verifiable': 'VERIFIABLE',
 'label': 'SUPPORTS',
 'claim': 'Eminem has an album.',
 'evidence': [[[247276, 247592, 'Eminem', 2]],
  [[247276, 247593, 'Eminem', 3]],
  [[247276, 247594, 'Eminem', 4]],
  [[247276, 247595, 'Eminem', 8]],
  [[247276, 247596, 'Eminem', 9]],
  [[247276, 247597, 'Eminem', 12]],
  [[247276, 247598, 'Eminem', 13]],
  [[247276, 247599, 'Eminem', 11],
   [247276, 247599, 'Relapse_-LRB-Eminem_album-RRB-', 0]],
  [[247276, 247600, 'Eminem', 10],
   [247276, 247600, 'Encore_-LRB-Eminem_album-RRB-', 0]]],
 'evidence_text': ['Encore -LRB- stylized as ƎNCORE -RRB- is the fifth studio album by American rapper Eminem .',
  'Throughout his career , he has had 10 number-one albums on the Billboard 200 and five number-one singles on the Billboard Hot 100 .',
  'Eminem went on hiatus after touring in 2005 , releasing Relapse in 2009 and Recovery in 2010 .',
  'Relapse is the sixth studio album by American rapper Eminem .',
  'Both won Grammy Awards and Recov

In [27]:
climate_ds[0]

{'claim_id': '0',
 'claim': 'Global warming is driving polar bears toward extinction',
 'claim_label': 'SUPPORTS',
 'evidences': [{'evidence_id': 'Extinction risk from global warming:170',
   'evidence_label': 'NOT_ENOUGH_INFO',
   'article': 'Extinction risk from global warming',
   'evidence': '"Recent Research Shows Human Activity Driving Earth Towards Global Extinction Event".',
   'entropy': 0.6931471806,
   'votes': ['SUPPORTS', 'NOT_ENOUGH_INFO', None, None, None]},
  {'evidence_id': 'Global warming:14',
   'evidence_label': 'SUPPORTS',
   'article': 'Global warming',
   'evidence': 'Environmental impacts include the extinction or relocation of many species as their ecosystems change, most immediately the environments of coral reefs, mountains, and the Arctic.',
   'entropy': 0.0,
   'votes': ['SUPPORTS', 'SUPPORTS', None, None, None]},
  {'evidence_id': 'Global warming:178',
   'evidence_label': 'NOT_ENOUGH_INFO',
   'article': 'Global warming',
   'evidence': 'Rising temperatu

In [28]:
pubhealth_train_ds[0]

{'claim_id': '15661',
 'claim': '"The money the Clinton Foundation took from from foreign governments while Hillary Clinton was secretary of state ""is clearly illegal. … The Constitution says you can’t take this stuff."',
 'date_published': 'April 26, 2015',
 'explanation': '"Gingrich said the Clinton Foundation ""took money from from foreign governments while (Hillary Clinton) was secretary of state. It is clearly illegal. … The Constitution says you can’t take this stuff."" A clause in the Constitution does prohibit U.S. officials such as former Secretary of State Hillary Clinton from receiving gifts, or emoluments, from foreign governments. But the gifts in this case were donations from foreign governments that went to the Clinton Foundation, not Hillary Clinton. She was not part of the foundation her husband founded while she was secretary of state. Does that violate the Constitution? Some libertarian-minded constitutional law experts say it very well could. Others are skeptical. 