This is the repo for our paper: HaluEval-Wild: Evaluating Hallucinations of Language Models in the Wild, in which we introduce HaluEval-Wild, the first benchmark specifically designed to evaluate LLM hallucinations in the wild.
HaluEval-Wild uses MIT License.
@misc{HaluEval-Wild,
author = {Zhiying Zhu and Yiming Yang and Zhiqing Sun},
title = {HaluEval-Wild: Evaluating Hallucinations of Language Models in the Wild},
year = {2024},
journal={arXiv preprint arXiv:2403.04307},
url={https://arxiv.org/abs/2403.04307}
}