Attentive Fusion is a layer designed to Fuse the output of Multimodal Data for Classification. The layer itself learns from the input data and provides useful information. It can be used beside Multi-Head Attention as it has less computation cost.
@misc{mandal2024attentive,
title={Attentive Fusion: A Transformer-based Approach to Multimodal Hate Speech Detection},
author={Atanu Mandal and Gargi Roy and Amit Barman and Indranil Dutta and Sudip Kumar Naskar},
year={2024},
eprint={2401.10653},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
Updated on 31 January 2024