Data can be downloaded here
conda create -n vlgrammar python=3.7 pytorch=1.7.1 torchvision -c pytorch
conda activate vlgrammar
pip install -r requirements.txt
git clone --branch infer_pos_tag https://github.com/zhaoyanpeng/pytorch-struct.git
cd pytorch-struct
pip install -e
cd SCAN
python simclr.py --config_env configs/env.yml --config_exp configs/pretext/simclr_partit_chair.yml
python scan.py --config_env configs/env.yml --config_exp configs/scan/scan_partit_chair.yml
or use our pretrained model
cd VLGrammr
python train.py or python train.py --type chair
Model checkpoints can be downloaded here
@misc{hong2021vlgrammar,
title={VLGrammar: Grounded Grammar Induction of Vision and Language},
author={Yining Hong and Qing Li and Song-Chun Zhu and Siyuan Huang},
year={2021},
journal={ICCV},
}