/
train_imagenette.py
71 lines (61 loc) · 2.85 KB
/
train_imagenette.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
from fastai.script import *
from fastai.vision import *
from fastai.callbacks import *
from fastai.distributed import *
from fastprogress import fastprogress
from torchvision.models import *
from fastai.vision.models.xresnet import *
from fastai.vision.models.presnet import *
torch.backends.cudnn.benchmark = True
fastprogress.MAX_COLS = 80
def get_data(size, woof, bs, workers=None):
if size<=128: path = URLs.IMAGEWOOF_160 if woof else URLs.IMAGENETTE_160
elif size<=192: path = URLs.IMAGEWOOF_320 if woof else URLs.IMAGENETTE_320
else : path = URLs.IMAGEWOOF if woof else URLs.IMAGENETTE
path = untar_data(path)
n_gpus = num_distrib() or 1
if workers is None: workers = min(8, num_cpus()//n_gpus)
return (ImageList.from_folder(path).split_by_folder(valid='val')
.label_from_folder().transform(([flip_lr(p=0.5)], []), size=size)
.databunch(bs=bs, num_workers=workers)
.presize(size, scale=(0.35,1))
.normalize(imagenet_stats))
@call_parse
def main(
gpu:Param("GPU to run on", str)=None,
woof: Param("Use imagewoof (otherwise imagenette)", bool)=False,
lr: Param("Learning rate", float)=1e-3,
size: Param("Size (px: 128,192,224)", int)=128,
alpha: Param("Alpha", float)=0.99,
mom: Param("Momentum", float)=0.9,
eps: Param("epsilon", float)=1e-6,
epochs: Param("Number of epochs", int)=5,
bs: Param("Batch size", int)=256,
mixup: Param("Mixup", float)=0.,
opt: Param("Optimizer (adam,rms,sgd)", str)='adam',
arch: Param("Architecture (xresnet34, xresnet50, presnet34, presnet50)", str)='xresnet50',
dump: Param("Print model; don't train", bool)=False,
):
"Distributed training of Imagenette."
gpu = setup_distrib(gpu)
if gpu is None: bs *= torch.cuda.device_count()
if opt=='adam' : opt_func = partial(optim.Adam, betas=(mom,alpha), eps=eps)
elif opt=='rms' : opt_func = partial(optim.RMSprop, alpha=alpha, eps=eps)
elif opt=='sgd' : opt_func = partial(optim.SGD, momentum=mom)
data = get_data(size, woof, bs)
bs_rat = bs/256
if gpu is not None: bs_rat *= num_distrib()
if not gpu: print(f'lr: {lr}; eff_lr: {lr*bs_rat}; size: {size}; alpha: {alpha}; mom: {mom}; eps: {eps}')
lr *= bs_rat
m = globals()[arch]
learn = (Learner(data, m(c_out=10), wd=1e-2, opt_func=opt_func,
metrics=[accuracy,top_k_accuracy],
bn_wd=False, true_wd=True,
loss_func = LabelSmoothingCrossEntropy())
)
if dump: print(learn.model); exit()
if mixup: learn = learn.mixup(alpha=mixup)
learn = learn.to_fp16(dynamic=True)
if gpu is None: learn.to_parallel()
elif num_distrib()>1: learn.to_distributed(gpu) # Requires `-m fastai.launch`
learn.fit_one_cycle(epochs, lr, div_factor=10, pct_start=0.3)