Skip to content
This repository has been archived by the owner on Mar 19, 2024. It is now read-only.

Commit

Permalink
RoB distillation + JEPA evaluations (#284)
Browse files Browse the repository at this point in the history
Summary: Pull Request resolved: fairinternal/ssl_scaling#284

Reviewed By: odelalleau

Differential Revision: D42220017

Pulled By: QuentinDuval

fbshipit-source-id: 742419aa859fdbe4bc80f1f9e9f4771fee0f41a2
  • Loading branch information
QuentinDuval authored and facebook-github-bot committed Dec 28, 2022
1 parent 346114a commit 04788de
Show file tree
Hide file tree
Showing 259 changed files with 13,408 additions and 791 deletions.
11 changes: 6 additions & 5 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ install_classy_vision: &install_classy_vision
working_directory: ~/
command: |
pip uninstall -y classy_vision
pip install classy-vision@https://github.com/facebookresearch/ClassyVision/tarball/main
pip install classy-vision@https://github.com/facebookresearch/ClassyVision/tarball/4785d5ee19d3bcedd5b28c1eb51ea1f59188b54d
setup_venv: &setup_venv
- run:
Expand Down Expand Up @@ -151,7 +152,7 @@ jobs:
# Cache the vissl_venv directory that contains dependencies
- restore_cache:
keys:
- v8-cpu-dependencies-{{ checksum "requirements.txt" }}-{{ checksum "setup.py" }}
- v9-cpu-dependencies-{{ checksum "requirements.txt" }}-{{ checksum "setup.py" }}

- <<: *install_vissl_dep
- <<: *install_augly
Expand All @@ -163,7 +164,7 @@ jobs:
- save_cache:
paths:
- ~/vissl_venv
key: v8-cpu-dependencies-{{ checksum "requirements.txt" }}-{{ checksum "setup.py" }}
key: v9-cpu-dependencies-{{ checksum "requirements.txt" }}-{{ checksum "setup.py" }}

- <<: *install_vissl

Expand Down Expand Up @@ -195,7 +196,7 @@ jobs:
# Download and cache dependencies
- restore_cache:
keys:
- v8-gpu-dependencies-{{ checksum "requirements.txt" }}-{{ checksum "setup.py" }}-{{ checksum "docker/common/install_apex.sh" }}
- v9-gpu-dependencies-{{ checksum "requirements.txt" }}-{{ checksum "setup.py" }}-{{ checksum "docker/common/install_apex.sh" }}

- <<: *install_vissl_dep
- <<: *install_classy_vision
Expand All @@ -210,7 +211,7 @@ jobs:
- save_cache:
paths:
- ~/vissl_venv
key: v8-gpu-dependencies-{{ checksum "requirements.txt" }}-{{ checksum "setup.py" }}-{{ checksum "docker/common/install_apex.sh" }}
key: v9-gpu-dependencies-{{ checksum "requirements.txt" }}-{{ checksum "setup.py" }}-{{ checksum "docker/common/install_apex.sh" }}

- <<: *install_vissl

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,6 @@ config:
TRAINER:
TRAIN_STEP_NAME: standard_train_step
MODEL:
FEATURE_EVAL_SETTINGS:
EVAL_MODE_ON: True
EVAL_TRUNK_AND_HEAD: False
TRUNK:
NAME: resnet
RESNETS:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# @package _global_
config:
MODEL:
TRUNK:
NAME: mobilenetv3_timm
MOBILE_NET:
NAME: mobilenetv3_large_100
TRUNK_ONLY: True
HEAD:
PARAMS: [
["mobilenet_v3_head_timm", {"num_classes": 1000}],
]
OPTIMIZER:
regularize_bn: True
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# @package _global_
config:
MODEL:
TRUNK:
NAME: mobilenetv3_tv
MOBILE_NET:
NAME: mobilenetv3_large_100
TIMM_BN: False
HEAD:
PARAMS: [
["mobilenet_v3_head", {"num_classes": 1000}],
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# @package _global_
config:
MODEL:
TRUNK:
NAME: resnet
RESNETS:
DEPTH: 18
HEAD:
PARAMS: [['eval_mlp', {'in_channels': 512, 'dims': [512, 1000]}]]
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# @package _global_
config:
MODEL:
TRUNK:
NAME: resnet
RESNETS:
DEPTH: 34
HEAD:
PARAMS: [['eval_mlp', {'in_channels': 512, 'dims': [512, 1000]}]]
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# @package _global_
config:
MODEL:
TRUNK:
NAME: resnet
RESNETS:
DEPTH: 50
HEAD:
PARAMS: [['eval_mlp', {'in_channels': 2048, 'dims': [2048, 1000]}]]
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# @package _global_
config:
MODEL:
FEATURE_EVAL_SETTINGS:
EVAL_MODE_ON: True
FREEZE_TRUNK_AND_HEAD: True
LINEAR_EVAL_FEAT_POOL_OPS_MAP: [
["concatCLS4", ["Identity", []] ],
]
TRUNK: # Tiny
NAME: vision_transformer
VISION_TRANSFORMERS:
IMAGE_SIZE: 224
PATCH_SIZE: 16
NUM_LAYERS: 12
NUM_HEADS: 3
HIDDEN_DIM: 192
MLP_DIM: 768
CLASSIFIER: token
DROPOUT_RATE: 0
ATTENTION_DROPOUT_RATE: 0
QKV_BIAS: True
DROP_PATH_RATE: 0.0
HEAD:
PARAMS: [
["eval_mlp", {"in_channels": 768, "dims": [768, 1000]}],
]
Original file line number Diff line number Diff line change
@@ -1,29 +1,5 @@
# @package _global_
config:
DATA:
TRAIN:
BATCHSIZE_PER_REPLICA: 32
TRANSFORMS:
- name: RandomResizedCrop
size: 224
interpolation: 3
- name: RandomHorizontalFlip
- name: ToTensor
- name: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
TEST:
BATCHSIZE_PER_REPLICA: 32
TRANSFORMS:
- name: Resize
size: 256
interpolation: 3
- name: CenterCrop
size: 224
- name: ToTensor
- name: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
MODEL:
FEATURE_EVAL_SETTINGS:
LINEAR_EVAL_FEAT_POOL_OPS_MAP: [
Expand Down Expand Up @@ -54,9 +30,5 @@ config:
["eval_mlp", {"in_channels": 4096, "dims": [4096, 100]}],
["eval_mlp", {"in_channels": 1024, "dims": [1024, 100]}],
]
WEIGHTS_INIT:
PARAMS_FILE: "manifold://ssl_framework/tree/gfsai-bistro2-east/ai-group/users/prigoyal/vissl/oss_beit_large_patch16_224_pt22k.pth"
APPEND_PREFIX: trunk.base_model.
STATE_DICT_KEY_NAME: 'model'
OPTIMIZER:
regularize_bn: True
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# @package _global_
config:
MODEL:
FEATURE_EVAL_SETTINGS:
LINEAR_EVAL_FEAT_POOL_OPS_MAP: [
["flatten", ["Identity", []] ],
["flatten", ["Identity", []] ],
]
TRUNK:
NAME: mobilenetv3_timm
MOBILE_NET:
NAME: mobilenetv3_large_100
PRETRAINED: False
HEAD:
PARAMS: [
["eval_mlp", {"in_channels": 1280, "dims": [1280, 100]}],
["mlp", {"dims": [1280, 100]}],
]
OPTIMIZER:
regularize_bn: True
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# @package _global_
config:
MODEL:
FEATURE_EVAL_SETTINGS:
EVAL_MODE_ON: True
FREEZE_TRUNK_ONLY: True
SHOULD_FLATTEN_FEATS: True
LINEAR_EVAL_FEAT_POOL_OPS_MAP: [
# Linear heads on top of normalized or not representations
["trunk_pool", ["Identity", []] ],
["trunk_pool", ["Identity", []] ],
["trunk_pool", ["Identity", []] ],

# MobileNet head on top of normalized or not representations
["trunk_pool", ["Identity", []] ],
["trunk_pool", ["Identity", []] ],
["trunk_pool", ["Identity", []] ],
# ["trunk_pool", ["Identity", []] ],
# ["trunk_pool", ["Identity", []] ],

# Exploring a two layer head
["trunk_pool", ["Identity", []] ],
["trunk_pool", ["Identity", []] ],
["trunk_pool", ["Identity", []] ],

# Combining several levels of representations
["trunk", ["AdaptiveAvgPool2d", [[2, 1]]]],
["trunk", ["AdaptiveAvgPool2d", [[2, 1]]]],
["trunk", ["AdaptiveAvgPool2d", [[2, 1]]]],
["trunk", ["AdaptiveAvgPool2d", [[2, 2]]]],
["trunk", ["AdaptiveAvgPool2d", [[2, 2]]]],
["trunk", ["AdaptiveAvgPool2d", [[2, 2]]]],
]
TRUNK:
NAME: mobilenetv3_tv
MOBILE_NET:
NAME: mobilenetv3_large_100
PRETRAINED: False
HEAD:
PARAMS: [
# Linear heads on top of normalized or not representations
["eval_mlp", {"in_channels": 960, "dims": [960, 100]}],
["eval_mlp", {"in_channels": 960, "dims": [960, 100]}],
["eval_mlp", {"in_channels": 960, "dims": [960, 100]}],

# MobileNet head on top of normalized or not representations
["mobilenet_v3_head", {"with_bn": True, "num_classes": 100}],
["mobilenet_v3_head", {"with_bn": True, "num_classes": 100}],
["mobilenet_v3_head", {"with_bn": True, "num_classes": 100}],
# ["mobilenet_v3_head", {"with_bn": True, "drop_out": 0.1, "num_classes": 100}],
# ["mobilenet_v3_head", {"with_bn": True, "drop_out": 0.0, "num_classes": 100}],

# Exploring a two layers head
["eval_mlp", {"in_channels": 960, "dims": [960, 1280, 100]}],
["eval_mlp", {"in_channels": 960, "dims": [960, 1280, 100]}],
["eval_mlp", {"in_channels": 960, "dims": [960, 1280, 100]}],

# Combining several levels of representations
["eval_mlp", {"in_channels": 1920, "dims": [1920, 100]}],
["eval_mlp", {"in_channels": 1920, "dims": [1920, 100]}],
["eval_mlp", {"in_channels": 1920, "dims": [1920, 100]}],
["eval_mlp", {"in_channels": 3840, "dims": [3840, 100]}],
["eval_mlp", {"in_channels": 3840, "dims": [3840, 100]}],
["eval_mlp", {"in_channels": 3840, "dims": [3840, 100]}],
]
OPTIMIZER:
name: sgd
# In the OSS Caffe2 benchmark, RN50 models use 1e-4 and AlexNet models 5e-4
weight_decay: 0.0005
momentum: 0.9
num_epochs: 28
nesterov: True
regularize_bn: True
regularize_bias: True
param_schedulers:
lr:
auto_lr_scaling:
auto_scale: true
base_value: 0.01
base_lr_batch_size: 256
name: multistep
values: [0.01, 0.001, 0.0001, 0.00001]
milestones: [8, 16, 24]
update_interval: epoch
param_group_constructor: linear_eval_heads
linear_eval_heads:
# Linear heads on top of normalized or not representations
- {"lr": 1.0, "weight_decay": 0.0005, "regularize_bn": True}
- {"lr": 1.0, "weight_decay": 0.0005, "regularize_bn": False}
- {"lr": 1.0, "weight_decay": 0.0}
# MobileNet head on top of normalized or not representations
- {"lr": 1.0, "weight_decay": 0.0005, "regularize_bn": True}
- {"lr": 1.0, "weight_decay": 0.0005, "regularize_bn": False}
- {"lr": 1.0, "weight_decay": 0.0}
# Exploring a two layers head
- {"lr": 1.0, "weight_decay": 0.0005}
- {"lr": 1.0, "weight_decay": 0.0001}
- {"lr": 1.0, "weight_decay": 0.0}
# Combining several levels of representations
- {"lr": 1.0, "weight_decay": 0.0005, "regularize_bn": True}
- {"lr": 1.0, "weight_decay": 0.0005, "regularize_bn": False}
- {"lr": 1.0, "weight_decay": 0.0}
- {"lr": 1.0, "weight_decay": 0.0005, "regularize_bn": True}
- {"lr": 1.0, "weight_decay": 0.0005, "regularize_bn": False}
- {"lr": 1.0, "weight_decay": 0.0}
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# @package _global_
config:
DATA:
TRAIN:
TRANSFORMS:
- name: RandomResizedCrop
size: 224
interpolation: 3
- name: RandomHorizontalFlip
- name: ToTensor
- name: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
TEST:
TRANSFORMS:
- name: Resize
size: 256
interpolation: 3
- name: CenterCrop
size: 224
- name: ToTensor
- name: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
MODEL:
FEATURE_EVAL_SETTINGS:
LINEAR_EVAL_FEAT_POOL_OPS_MAP: [
["concatPOOL4", ["Identity", []] ],
["lastPOOL", ["Identity", []] ],
["concatPOOL4", ["Identity", []] ],
["lastPOOL", ["Identity", []] ],
]
TRUNK: # L-16
NAME: vision_transformer
VISION_TRANSFORMERS:
IMAGE_SIZE: 224
PATCH_SIZE: 16
NUM_LAYERS: 40
NUM_HEADS: 16
HIDDEN_DIM: 1408
MLP_DIM: 6144
DROPOUT_RATE: 0.0
ATTENTION_DROPOUT_RATE: 0.0
CLASSIFIER: token
QKV_BIAS: True
DROP_PATH_RATE: 0.0
USE_CLASS_TOKEN: False
HEAD:
PARAMS: [
["eval_mlp", {"in_channels": 5632, "dims": [5632, 100]}],
["eval_mlp", {"in_channels": 1408, "dims": [1408, 100]}],
["mlp", {"dims": [5632, 100]}],
["mlp", {"dims": [1408, 100]}],
]
Loading

0 comments on commit 04788de

Please sign in to comment.