In [2]:
import os

os.environ['AWS_REGION_NAME'] = 'us-west-2'

In [3]:
from refchecker.extractor import ClaudeExtractor
from refchecker.claim_utils import Claim, Response, Sentence


text = \
    "The longest river in the world is the Nile River, located in northeastern Africa. " + \
    "It stretches approximately 6,650 kilometers (4,130 miles) in length. " + \
    "The Nile has been historically significant, playing a crucial role in the " + \
    "development of ancient Egyptian civilization and supporting life and agriculture along its banks."

reference = \
    "The Nile is a major north-flowing river in northeastern Africa. " + \
    "It flows into the Mediterranean Sea. The Nile is the longest river " + \
    "in Africa and has historically been considered the longest river in " + \
    "the world,[3][4] though this has been contested by research suggesting " + \
    "that the Amazon River is slightly longer.[5][6] Of the world's " + \
    "major rivers, the Nile is one of the smallest, as measured by annual " + \
    "flow in cubic metres of water.[7] About 6,650 km (4,130 mi)[a] long, its " + \
    "drainage basin covers eleven countries: the Democratic Republic of the Congo, " + \
    "Tanzania, Burundi, Rwanda, Uganda, Kenya, Ethiopia, Eritrea, South Sudan, Sudan, " + \
    "and Egypt.[9] In particular, the Nile is the primary water source of Egypt, Sudan " + \
    "and South Sudan.[10] Additionally, the Nile is an important economic river, supporting agriculture and fishing."

# claim extraction
extractor = ClaudeExtractor()

# each element in claims is an instance of Claim
claims = extractor.extract_subsentence_claims(
    response=text,
    max_new_tokens=1000
)
response = Response(text)
print('### Text')
print(response.get_indexed_response(condense_newlines=False))
print('### Claims')
for c in claims:
    print(c)

### Text
[1] The longest river in the world is the Nile River, located in northeastern Africa. [2] It stretches approximately 6,650 kilometers (4,130 miles) in length. [3] The Nile has been historically significant, playing a crucial role in the development of ancient Egyptian civilization and supporting life and agriculture along its banks.
### Claims
The longest river in the world is the Nile River. [1]
The Nile River is located in northeastern Africa. [1]
The Nile River stretches approximately 6,650 kilometers (4,130 miles) in length. [2]
The Nile River has been historically significant. [3]
The Nile River played a crucial role in the development of ancient Egyptian civilization. [3]
The Nile River supported life and agriculture along its banks. [3]


In [4]:
# get attributed sentences
for c in claims:
    print(f'### Claim\n{c.text}')
    print('### Attributed Sentences')
    for sent_id in c.attributed_sent_ids:
        print(response.get_sentence_by_id(sent_id).text.strip())
    print()

### Claim
The longest river in the world is the Nile River.
### Attributed Sentences
The longest river in the world is the Nile River, located in northeastern Africa.

### Claim
The Nile River is located in northeastern Africa.
### Attributed Sentences
The longest river in the world is the Nile River, located in northeastern Africa.

### Claim
The Nile River stretches approximately 6,650 kilometers (4,130 miles) in length.
### Attributed Sentences
It stretches approximately 6,650 kilometers (4,130 miles) in length.

### Claim
The Nile River has been historically significant.
### Attributed Sentences
The Nile has been historically significant, playing a crucial role in the development of ancient Egyptian civilization and supporting life and agriculture along its banks.

### Claim
The Nile River played a crucial role in the development of ancient Egyptian civilization.
### Attributed Sentences
The Nile has been historically significant, playing a crucial role in the development of ancien

In [5]:
from refchecker.checker import LLMChecker

print(len(claims))

checker = LLMChecker(model='claude3')
results = checker.check(
    claim=[c.text for c in claims],
    reference=[reference],
    max_reference_segment_length=0
)
print(results)
print(len(results[0]))

6


100%|██████████| 4/4 [00:21<00:00,  5.38s/it]

[['Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral']]
49



