Skip to content
This repository has been archived by the owner on Mar 19, 2024. It is now read-only.

Commit

Permalink
xcit integration (#187)
Browse files Browse the repository at this point in the history
Summary:
Integration of XCiT into VISSL :)

Pull Request resolved: fairinternal/ssl_scaling#187

Reviewed By: iseessel

Differential Revision: D31378757

Pulled By: prigoyal

fbshipit-source-id: d383390a3ae4def585eaf7bef6d743e9c5c059e8
  • Loading branch information
Mathilde Caron authored and facebook-github-bot committed Oct 6, 2021
1 parent af8e879 commit 056e695
Show file tree
Hide file tree
Showing 4 changed files with 561 additions and 1 deletion.
140 changes: 140 additions & 0 deletions configs/config/pretrain/dino/dino_16gpus_xcit_small_12_p16.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
# @package _global_
config:
VERBOSE: False
LOG_FREQUENCY: 10
TEST_ONLY: False
TEST_MODEL: False
SEED_VALUE: 0
MULTI_PROCESSING_METHOD: forkserver
HOOKS:
PERF_STATS:
MONITOR_PERF_STATS: True
PERF_STAT_FREQUENCY: 40
ROLLING_BTIME_FREQ: 5
DATA:
NUM_DATALOADER_WORKERS: 10
TRAIN:
DATA_SOURCES: [disk_folder]
DATASET_NAMES: [imagenet1k_folder]
BATCHSIZE_PER_REPLICA: 64
LABEL_TYPE: sample_index # just an implementation detail. Label isn't used
TRANSFORMS:
- name: ImgPilToMultiCrop
total_num_crops: 10
size_crops: [224, 96]
num_crops: [2, 8]
crop_scales: [[0.3, 1], [0.05, 0.3]]
- name: RandomHorizontalFlip
p: 0.5
- name: ImgPilColorDistortion
strength: 0.5
- name: ImgPilMultiCropRandomApply
transforms: [{"name": "ImgPilGaussianBlur", "p": 1., "radius_min": 0.1, "radius_max": 2.0}]
prob: [1., 0.1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
- name: ImgPilMultiCropRandomApply
transforms: [{"name": "ImgPilRandomSolarize", "p": 1.}]
prob: [0., 0.2, 0., 0., 0, 0, 0, 0, 0, 0]
- name: ToTensor
- name: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
COLLATE_FUNCTION: multicrop_collator
MMAP_MODE: True
COPY_TO_LOCAL_DISK: False
COPY_DESTINATION_DIR: /tmp/imagenet1k/
DROP_LAST: True
TRAINER:
TRAIN_STEP_NAME: standard_train_step
METERS:
name: ""
MODEL:
TRUNK:
NAME: xcit
XCIT:
IMAGE_SIZE: 224
PATCH_SIZE: 16
HIDDEN_DIM: 384
NUM_LAYERS: 12
NUM_HEADS: 8
DROPOUT_RATE: 0
ATTENTION_DROPOUT_RATE: 0
DROP_PATH_RATE: 0.05
ETA: 1
TOKENS_NORM: True
QKV_BIAS: True
QK_SCALE: False
HEAD:
PARAMS: [
["swav_head", {"use_weight_norm_prototypes": True, "dims": [384, 2048, 2048, 256], "use_bn": False, "return_embeddings": False, "activation_name": "GELU", "num_clusters": [65536]}],
]
TEMP_FROZEN_PARAMS_ITER_MAP: [
['module.heads.0.prototypes0.weight_v', 1251],
['module.heads.0.prototypes0.weight_g', 1251],
]
AMP_PARAMS:
AMP_TYPE: pytorch
USE_AMP: True
LOSS:
name: dino_loss
dino_loss:
momentum: 0.996
teacher_temp_warmup_iters: 37530 # 30 epochs
teacher_temp_min: 0.04
teacher_temp_max: 0.07
ema_center: 0.9
normalize_last_layer: false
OPTIMIZER:
name: adamw
momentum: 0.9
nesterov: False
num_epochs: 300
regularize_bn: False
regularize_bias: False
param_schedulers:
lr_head:
name: composite
schedulers:
- name: linear
start_value: 0.00001
end_value: 0.002
- name: cosine
start_value: 0.002
end_value: 0.00001
update_interval: epoch
interval_scaling: [rescaled, fixed]
lengths: [0.0333, 0.9667]
lr:
name: composite
schedulers:
- name: linear
start_value: 0.00001
end_value: 0.002
- name: cosine
start_value: 0.002
end_value: 0.00001
update_interval: epoch
interval_scaling: [rescaled, fixed]
lengths: [0.0333, 0.9667]
weight_decay:
name: cosine
start_value: 0.04
end_value: 0.4
update_interval: epoch
weight_decay_head:
name: cosine
start_value: 0.04
end_value: 0.4
update_interval: epoch
DISTRIBUTED:
BACKEND: nccl
NUM_NODES: 2
NUM_PROC_PER_NODE: 8
INIT_METHOD: tcp
RUN_ID: auto
MACHINE:
DEVICE: gpu
CHECKPOINT:
DIR: "."
AUTO_RESUME: True
CHECKPOINT_FREQUENCY: 5
OVERWRITE_EXISTING: true
19 changes: 19 additions & 0 deletions vissl/config/defaults.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -622,6 +622,25 @@ config:
QKV_BIAS: False # Bias for QKV in attention layers.
QK_SCALE: False # Scale

# ------------------------------------------------------------- #
# XCiT (https://arxiv.org/abs/2106.09681)
# default config is that of xcit_small_12_p16
# ------------------------------------------------------------- #
XCIT:
name:
IMAGE_SIZE: 224
PATCH_SIZE: 16
HIDDEN_DIM: 384
NUM_LAYERS: 12
NUM_HEADS: 8
DROPOUT_RATE: 0
ATTENTION_DROPOUT_RATE: 0
DROP_PATH_RATE: 0.05
ETA: 1
TOKENS_NORM: True
QKV_BIAS: True # Bias for QKV in attention layers.
QK_SCALE: False # Scale

# ------------------------------------------------------------- #
# Parameters unique to the ConViT and not used for standard vision
# transformers
Expand Down
Loading

0 comments on commit 056e695

Please sign in to comment.