This repository has been archived by the owner on Mar 19, 2024. It is now read-only.
/
eval_resnet_8gpu_transfer_openimages_linear.yaml
131 lines (131 loc) · 3.79 KB
/
eval_resnet_8gpu_transfer_openimages_linear.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# @package _global_
config:
VERBOSE: True
LOG_FREQUENCY: 10
TEST_ONLY: False
TEST_EVERY_NUM_EPOCH: 1
TEST_MODEL: True
SEED_VALUE: 1
MULTI_PROCESSING_METHOD: forkserver
HOOKS:
PERF_STATS:
MONITOR_PERF_STATS: True
DATA:
NUM_DATALOADER_WORKERS: 2
TRAIN:
DATA_SOURCES: [everstore]
LABEL_SOURCES: [disk_filelist]
DATASET_NAMES: [openimages_v6]
BATCHSIZE_PER_REPLICA: 16
TRANSFORMS:
- name: RandomResizedCrop
size: 224
- name: RandomHorizontalFlip
- name: ToTensor
- name: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
MMAP_MODE: True
COPY_TO_LOCAL_DISK: False
COPY_DESTINATION_DIR: /tmp/openimages_v6/
COLLATE_FUNCTION: targets_one_hot_default_collator
COLLATE_FUNCTION_PARAMS: {"num_classes": 9605}
TEST:
DATA_SOURCES: [everstore]
LABEL_SOURCES: [disk_filelist]
DATASET_NAMES: [openimages_v6]
BATCHSIZE_PER_REPLICA: 16
TRANSFORMS:
- name: Resize
size: 256
- name: CenterCrop
size: 224
- name: ToTensor
- name: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
MMAP_MODE: True
COPY_TO_LOCAL_DISK: False
COPY_DESTINATION_DIR: /tmp/openimages_v6/
COLLATE_FUNCTION: targets_one_hot_default_collator
COLLATE_FUNCTION_PARAMS: {"num_classes": 9605}
METERS:
name: mean_ap_list_meter
# training data size is 5.6M , we just disable the training meter
enable_training_meter: False
mean_ap_list_meter:
num_classes: 9605
num_meters: 1
TRAINER:
TRAIN_STEP_NAME: standard_train_step
MODEL:
FEATURE_EVAL_SETTINGS:
EVAL_MODE_ON: True
FREEZE_TRUNK_ONLY: True
SHOULD_FLATTEN_FEATS: False
LINEAR_EVAL_FEAT_POOL_OPS_MAP: [
# ["conv1", ["AvgPool2d", [[10, 10], 10, 4]]],
# ["res2", ["AvgPool2d", [[16, 16], 8, 0]]],
# ["res3", ["AvgPool2d", [[13, 13], 5, 0]]],
# ["res4", ["AvgPool2d", [[8, 8], 3, 0]]],
# ["res5", ["AvgPool2d", [[6, 6], 1, 0]]],
["res5avg", ["Identity", []]],
]
TRUNK:
NAME: resnet
TRUNK_PARAMS:
RESNETS:
DEPTH: 50
HEAD:
PARAMS: [
# ["eval_mlp", {"in_channels": 64, "dims": [9216, 9605]}],
# ["eval_mlp", {"in_channels": 256, "dims": [9216, 9605]}],
# ["eval_mlp", {"in_channels": 512, "dims": [8192, 9605]}],
# ["eval_mlp", {"in_channels": 1024, "dims": [9216, 9605]}],
# ["eval_mlp", {"in_channels": 2048, "dims": [8192, 9605]}],
["mlp", {"dims": [2048, 9605]}],
]
WEIGHTS_INIT:
PARAMS_FILE: "specify the model weights"
STATE_DICT_KEY_NAME: classy_state_dict
# STATE_DICT_KEY_NAME: model_state_dict
SYNC_BN_CONFIG:
CONVERT_BN_TO_SYNC_BN: True
SYNC_BN_TYPE: apex
GROUP_SIZE: 8
LOSS:
name: bce_logits_multiple_output_single_target
bce_logits_multiple_output_single_target:
reduction: "none"
world_size: 64
OPTIMIZER:
name: sgd
# In the OSS Caffe2 benchmark, RN50 models use 1e-4 and AlexNet models 5e-4
weight_decay: 0.0001
momentum: 0.9
num_epochs: 16
nesterov: True
regularize_bn: True
regularize_bias: True
param_schedulers:
lr:
auto_lr_scaling:
auto_scale: false
base_value: 0.003
base_lr_batch_size: 32
name: multistep
values: [0.003, 0.0003, 0.00003, 0.000003]
milestones: [2, 4, 6]
update_interval: epoch
DISTRIBUTED:
BACKEND: nccl
NUM_NODES: 8
NUM_PROC_PER_NODE: 8
INIT_METHOD: tcp
RUN_ID: auto
MACHINE:
DEVICE: gpu
CHECKPOINT:
DIR: "."
AUTO_RESUME: True
CHECKPOINT_FREQUENCY: 1