In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import functools
import traceback
def get_ref_free_exc_info():
    "Free traceback from references to locals/globals to avoid circular reference leading to gc.collect() unable to reclaim memory"
    type, val, tb = sys.exc_info()
    traceback.clear_frames(tb)
    return (type, val, tb)

def gpu_mem_restore(func):
    "Reclaim GPU RAM if CUDA out of memory happened, or execution was interrupted"
    @functools.wraps(func)
    def wrapper(*args, **kwargs):
        try:
            return func(*args, **kwargs)
        except:
            type, val, tb = get_ref_free_exc_info() # must!
            raise type(val).with_traceback(tb) from None
    return wrapper

In [3]:
from fastai.vision import *
from fastai.metrics import error_rate

In [4]:
path = Path('/home/init27/Downloads/cifar100/')

In [5]:
train = path/'train'
test = path/'test'

In [8]:
bs = 108

In [6]:
path.ls()

[PosixPath('/home/init27/Downloads/cifar100/models'),
 PosixPath('/home/init27/Downloads/cifar100/train'),
 PosixPath('/home/init27/Downloads/cifar100/test')]

In [9]:
np.random.seed(42)
data = ImageDataBunch.from_folder(path, train=train, valid_pct=0.2,
        ds_tfms=get_transforms(), size=224, num_workers=4, bs=bs).normalize(cifar_stats)

In [10]:
class gpu_mem_restore_ctx():
    " context manager to reclaim GPU RAM if CUDA out of memory happened, or execution was interrupted"
    def __enter__(self): return self
    def __exit__(self, exc_type, exc_val, exc_tb):
        if not exc_val: return True
        traceback.clear_frames(exc_tb)
        raise exc_type(exc_val).with_traceback(exc_tb) from None

In [11]:
#Allow crashing
learn = create_cnn(data, models.resnet18 , metrics=accuracy)

## Resnet 18



In [17]:
np.random.seed(42)
data = ImageDataBunch.from_folder(path, train=train, valid_pct=0.2,
        ds_tfms=get_transforms(), size=224, num_workers=4, bs=408).normalize(cifar_stats)
learn = create_cnn(data, models.resnet18, metrics=accuracy)

In [18]:
with gpu_mem_restore_ctx():
    learn.fit_one_cycle(1)

epoch,train_loss,valid_loss,accuracy
1,2.425544,1.632482,0.559083


In [19]:
with gpu_mem_restore_ctx():
    learn.fit_one_cycle(30, max_lr=1e-2)

epoch,train_loss,valid_loss,accuracy
1,1.683162,1.234888,0.647083
2,1.377793,1.006688,0.698833
3,1.210670,0.953726,0.711000
4,1.161485,0.981398,0.702750
5,1.158831,0.985615,0.702833
6,1.168251,1.045662,0.683667
7,1.150126,1.071930,0.681250
8,1.146979,1.025527,0.692417
9,1.109232,1.041167,0.691250
10,1.088920,1.046639,0.685083
11,1.055113,1.009589,0.702417
12,1.027755,0.937168,0.716500
13,1.003785,0.953145,0.715917
14,0.968761,0.916469,0.725417
15,0.932007,0.907113,0.728667
16,0.899529,0.874359,0.738333
17,0.876744,0.819828,0.748167
18,0.853781,0.813430,0.750667
19,0.821074,0.790245,0.762583
20,0.793510,0.764171,0.768833
21,0.766965,0.755508,0.770500
22,0.738198,0.737558,0.777167
23,0.711821,0.725124,0.779583
24,0.680861,0.722985,0.778667
25,0.663084,0.706659,0.783667
26,0.644237,0.696470,0.788083
27,0.619859,0.691847,0.790667
28,0.609522,0.691935,0.788750
29,0.597701,0.690438,0.789417
30,0.605155,0.690853,0.788500


## Resnet 18 (Mixed Precision)

In [29]:
np.random.seed(42)
data = ImageDataBunch.from_folder(path, train=train, valid_pct=0.2,
        ds_tfms=get_transforms(), size=224, num_workers=4, bs=720).normalize(cifar_stats)

In [30]:
learn = to_fp16(create_cnn(data, models.resnet18, metrics=accuracy))

In [31]:
with gpu_mem_restore_ctx():
    learn.fit_one_cycle(1)

epoch,train_loss,valid_loss,accuracy
1,2.892367,1.857484,0.514833


In [32]:
with gpu_mem_restore_ctx():
    learn.fit_one_cycle(30)

epoch,train_loss,valid_loss,accuracy
1,2.217803,1.776256,0.535000
2,2.111644,1.634846,0.565083
3,1.948603,1.451506,0.600167
4,1.763333,1.288680,0.634083
5,1.576218,1.170622,0.659167
6,1.435181,1.088699,0.677000
7,1.335717,1.024443,0.695833
8,1.241808,0.974325,0.710167
9,1.171949,0.934963,0.718083
10,1.118036,0.914327,0.726500
11,1.073453,0.887545,0.731083
12,1.032388,0.872336,0.735083
13,0.991246,0.859043,0.737417
14,0.968865,0.838764,0.744333
15,0.941147,0.827038,0.748500
16,0.921168,0.815047,0.750500
17,0.896468,0.810671,0.750750
18,0.876352,0.797368,0.752500
19,0.855625,0.796557,0.754833
20,0.834282,0.787105,0.758667
21,0.824325,0.783010,0.761167
22,0.799274,0.776148,0.762583
23,0.790784,0.770834,0.761250
24,0.774112,0.768528,0.763750
25,0.767463,0.766201,0.765917
26,0.758423,0.764043,0.766750
27,0.749885,0.762675,0.766500
28,0.746697,0.761255,0.767583
29,0.749717,0.761567,0.766833
30,0.747547,0.762019,0.766500


## Resnet 34 

In [155]:
np.random.seed(42)
data = ImageDataBunch.from_folder(path, train=train, valid_pct=0.2,
        ds_tfms=get_transforms(), size=224, num_workers=4, bs=248).normalize(cifar_stats)

In [156]:
learn = create_cnn(data, models.resnet34, metrics=accuracy)

In [157]:
with gpu_mem_restore_ctx():
    learn.fit_one_cycle(30)

epoch,train_loss,valid_loss,accuracy
1,4.091527,3.254414,0.291917
2,2.743854,1.980968,0.516750
3,1.920904,1.337292,0.628833
4,1.471699,1.053553,0.692083
5,1.233301,0.913338,0.720250
6,1.109750,0.848846,0.743417
7,1.037520,0.791737,0.756333
8,0.969036,0.762382,0.766417
9,0.919657,0.729532,0.773250
10,0.885872,0.712802,0.778000
11,0.839404,0.704638,0.782583
12,0.813941,0.690926,0.786333
13,0.789469,0.672594,0.793000
14,0.765909,0.663917,0.796250
15,0.728020,0.653305,0.800167
16,0.707719,0.656965,0.797250
17,0.682595,0.642476,0.804083
18,0.663653,0.637333,0.803500
19,0.651230,0.625249,0.806083
20,0.621778,0.622463,0.809583
21,0.613027,0.621823,0.809250
22,0.608741,0.618127,0.813333
23,0.577575,0.613892,0.814583
24,0.570914,0.609038,0.814833
25,0.544643,0.609252,0.816750
26,0.548220,0.606148,0.817583
27,0.535698,0.604082,0.817750
28,0.525107,0.604753,0.817667
29,0.531150,0.604243,0.817250
30,0.533026,0.604470,0.817167


## Resnet 34 (Mixed Precision)

In [158]:
np.random.seed(42)
data = ImageDataBunch.from_folder(path, train=train, valid_pct=0.2,
        ds_tfms=get_transforms(), size=224, num_workers=4, bs=496).normalize(cifar_stats)

In [159]:
learn = to_fp16(create_cnn(data, models.resnet34, metrics=accuracy))

In [160]:
with gpu_mem_restore_ctx():
    learn.fit_one_cycle(30)

epoch,train_loss,valid_loss,accuracy
1,4.527595,3.723982,0.192333
2,3.496515,2.463351,0.443083
3,2.486472,1.608410,0.577167
4,1.810522,1.204057,0.655750
5,1.430669,1.008747,0.701667
6,1.225085,0.905269,0.727500
7,1.092392,0.835543,0.743750
8,1.013207,0.793349,0.753833
9,0.954900,0.760904,0.762250
10,0.897951,0.739107,0.773417
11,0.860628,0.723568,0.776000
12,0.823930,0.703144,0.784000
13,0.792410,0.681215,0.790583
14,0.767022,0.683587,0.789167
15,0.741968,0.668285,0.796083
16,0.718596,0.661959,0.797583
17,0.693880,0.657565,0.796500
18,0.676531,0.652226,0.804333
19,0.661541,0.650092,0.803000
20,0.641904,0.632877,0.808167
21,0.628384,0.633768,0.807500
22,0.607901,0.630894,0.808167
23,0.597407,0.630145,0.809083
24,0.580347,0.625942,0.809583
25,0.576550,0.624290,0.813000
26,0.564124,0.620901,0.811500
27,0.561751,0.620830,0.811000
28,0.549595,0.620113,0.811500
29,0.549034,0.619881,0.812917
30,0.553179,0.618634,0.812083


## Training Resnet 50

In [44]:
np.random.seed(42)
data = ImageDataBunch.from_folder(path, train=train, valid_pct=0.2,
        ds_tfms=get_transforms(), size=224, num_workers=4, bs=98).normalize(cifar_stats)

In [45]:
learn = create_cnn(data, models.resnet50, metrics=accuracy)

In [46]:
with gpu_mem_restore_ctx():
    learn.fit_one_cycle(30)

epoch,train_loss,valid_loss,accuracy
1,2.538904,1.896686,0.537917
2,1.700987,1.219014,0.660167
3,1.304718,0.952666,0.716250
4,1.097150,0.850275,0.743417
5,1.008206,0.781421,0.758083
6,0.947150,0.740677,0.770500
7,0.891749,0.720329,0.778250
8,0.800058,0.686394,0.791250
9,0.771210,0.674660,0.793167
10,0.744009,0.653407,0.798000
11,0.710490,0.635136,0.803083
12,0.653651,0.618544,0.810833
13,0.654817,0.612365,0.811500
14,0.604655,0.610485,0.818000
15,0.593437,0.591317,0.820833
16,0.553768,0.596443,0.819417
17,0.507188,0.585333,0.825250
18,0.493085,0.583739,0.829417
19,0.476964,0.575371,0.828667
20,0.455830,0.569629,0.831167
21,0.447361,0.572533,0.831417
22,0.407300,0.567695,0.833667
23,0.383044,0.565551,0.833667
24,0.369885,0.563926,0.835917
25,0.376316,0.561316,0.836833
26,0.335686,0.558788,0.838000
27,0.346769,0.560210,0.838083
28,0.340222,0.561173,0.839667
29,0.327940,0.558264,0.839667
30,0.316947,0.559535,0.839333


## Resnet 50 (Mixed Precision)

In [57]:
np.random.seed(42)
data = ImageDataBunch.from_folder(path, train=train, valid_pct=0.2,
        ds_tfms=get_transforms(), size=224, num_workers=4, bs=164).normalize(cifar_stats)

In [58]:
learn = to_fp16(create_cnn(data, models.resnet50, metrics=accuracy))

In [59]:
with gpu_mem_restore_ctx():
    learn.fit_one_cycle(30)

epoch,train_loss,valid_loss,accuracy
1,2.853479,2.087002,0.499583
2,1.851662,1.342598,0.631250
3,1.387896,1.018817,0.699417
4,1.165870,0.866866,0.736500
5,1.031915,0.790143,0.755833
6,0.921515,0.759719,0.765500
7,0.880313,0.717190,0.778917
8,0.838310,0.697754,0.784250
9,0.775729,0.681422,0.791750
10,0.725294,0.649651,0.801583
11,0.692380,0.643033,0.801167
12,0.661506,0.633204,0.805750
13,0.616202,0.624044,0.810333
14,0.591845,0.612394,0.812417
15,0.566731,0.603592,0.818500
16,0.542707,0.597049,0.818333
17,0.506298,0.596841,0.822417
18,0.492856,0.588816,0.824667
19,0.450351,0.585315,0.826667
20,0.441241,0.580710,0.827167
21,0.420925,0.578927,0.828500
22,0.392566,0.581997,0.831417
23,0.374037,0.571375,0.832500
24,0.366736,0.572550,0.832583
25,0.339871,0.569914,0.831500
26,0.344734,0.568628,0.833083
27,0.322793,0.567510,0.832667
28,0.307898,0.565839,0.835750
29,0.322832,0.566465,0.832583
30,0.317137,0.568857,0.832000


## Resnet 101

In [161]:
np.random.seed(42)
data = ImageDataBunch.from_folder(path, train=train, valid_pct=0.2,
        ds_tfms=get_transforms(), size=224, num_workers=4, bs=64).normalize(cifar_stats)

In [162]:
learn = create_cnn(data, models.resnet101, metrics=accuracy)

In [163]:
with gpu_mem_restore_ctx():
    learn.fit_one_cycle(30)

epoch,train_loss,valid_loss,accuracy
1,2.055326,1.447209,0.630000
2,1.328482,0.938083,0.723333
3,1.065412,0.762804,0.767833
4,0.893859,0.681791,0.788333
5,0.857969,0.640077,0.801417
6,0.766642,0.637175,0.802417
7,0.781614,0.625195,0.804750
8,0.731761,0.596706,0.817333
9,0.682865,0.582054,0.822250
10,0.648590,0.560301,0.828667
11,0.618241,0.556258,0.829667
12,0.574654,0.547130,0.835917
13,0.530491,0.540169,0.837667
14,0.528848,0.534272,0.839333
15,0.491567,0.526254,0.840750
16,0.451276,0.531170,0.842167
17,0.438837,0.518594,0.847167
18,0.433088,0.524159,0.845750
19,0.381678,0.522843,0.847667
20,0.365216,0.521657,0.848917
21,0.343347,0.517553,0.852500
22,0.339596,0.507800,0.854000
23,0.309928,0.514493,0.854917
24,0.297399,0.506818,0.856000
25,0.268865,0.504412,0.857667
26,0.262993,0.505606,0.860333
27,0.244982,0.503122,0.858667
28,0.249251,0.501970,0.860833
29,0.254412,0.504455,0.861500
30,0.244284,0.502431,0.859750


## Resnet 101 (Mixed Precision)

In [164]:
np.random.seed(42)
data = ImageDataBunch.from_folder(path, train=train, valid_pct=0.2,
        ds_tfms=get_transforms(), size=224, num_workers=4, bs=116).normalize(cifar_stats)

In [165]:
learn = to_fp16(create_cnn(data, models.resnet101, metrics=accuracy))

In [166]:
with gpu_mem_restore_ctx():
    learn.fit_one_cycle(30)

epoch,train_loss,valid_loss,accuracy
1,2.356006,1.653988,0.594750
2,1.458173,1.015118,0.709333
3,1.113660,0.803704,0.758167
4,0.931843,0.710339,0.782333
5,0.844236,0.666915,0.791833
6,0.785052,0.625276,0.807167
7,0.725867,0.601363,0.815750
8,0.704728,0.592559,0.817833
9,0.656651,0.576617,0.821583
10,0.599608,0.572227,0.822667
11,0.569434,0.566142,0.828750
12,0.538815,0.539515,0.835917
13,0.508781,0.536017,0.835250
14,0.471497,0.535257,0.839833
15,0.455695,0.540152,0.836750
16,0.437731,0.531233,0.841583
17,0.398460,0.528875,0.847250
18,0.370818,0.521328,0.851000
19,0.361273,0.524604,0.851583
20,0.328099,0.516841,0.851167
21,0.309971,0.510228,0.854833
22,0.300888,0.518774,0.852333
23,0.289590,0.506502,0.856250
24,0.258556,0.513164,0.855417
25,0.254689,0.512996,0.857583
26,0.232998,0.516241,0.857750
27,0.238141,0.514106,0.857417
28,0.217220,0.514888,0.856250
29,0.224265,0.513912,0.856167
30,0.220724,0.514018,0.856750


## Resnet 152

In [65]:
np.random.seed(42)
data = ImageDataBunch.from_folder(path, train=train, valid_pct=0.2,
        ds_tfms=get_transforms(), size=224, num_workers=4, bs=48).normalize(cifar_stats)

In [66]:
learn = create_cnn(data, models.resnet152, metrics=accuracy)

In [67]:
with gpu_mem_restore_ctx():
    learn.fit_one_cycle(30)

epoch,train_loss,valid_loss,accuracy
1,1.898828,1.305966,0.653833
2,1.175181,0.842290,0.747250
3,0.966224,0.699221,0.786167
4,0.884034,0.651086,0.795667
5,0.792795,0.621682,0.804750
6,0.795135,0.593315,0.819083
7,0.723622,0.596839,0.816833
8,0.658714,0.581674,0.821417
9,0.670455,0.553189,0.833000
10,0.622607,0.560935,0.830250
11,0.587018,0.533542,0.836250
12,0.580308,0.520549,0.841167
13,0.521300,0.532823,0.840583
14,0.487249,0.526109,0.842750
15,0.448812,0.513343,0.850167
16,0.431717,0.506566,0.850667
17,0.401086,0.506412,0.851917
18,0.398720,0.505510,0.853083
19,0.361149,0.500569,0.855833
20,0.357157,0.493713,0.856417
21,0.319523,0.496027,0.860000
22,0.284045,0.494504,0.861000
23,0.253644,0.493373,0.861333
24,0.275702,0.500611,0.861167
25,0.248939,0.488999,0.863083
26,0.233902,0.496497,0.862333
27,0.218012,0.491988,0.864417
28,0.225783,0.493372,0.864667
29,0.217687,0.493124,0.866333
30,0.202206,0.491947,0.863083


## Resnet 152 (Mixed Precision)

In [167]:
np.random.seed(42)
data = ImageDataBunch.from_folder(path, train=train, valid_pct=0.2,
        ds_tfms=get_transforms(), size=224, num_workers=4, bs=88).normalize(cifar_stats)

In [168]:
learn = to_fp16(create_cnn(data, models.resnet152, metrics=accuracy))

In [169]:
with gpu_mem_restore_ctx():
    learn.fit_one_cycle(30)

epoch,train_loss,valid_loss,accuracy
1,2.160135,1.495938,0.625750
2,1.294585,0.920020,0.733083
3,0.984379,0.727062,0.776750
4,0.865696,0.641753,0.800250
5,0.777403,0.599795,0.815000
6,0.726824,0.607037,0.811833
7,0.695808,0.577306,0.824917
8,0.663318,0.561540,0.828833
9,0.601016,0.554505,0.833417
10,0.576079,0.546127,0.834667
11,0.549800,0.524947,0.842250
12,0.494807,0.513583,0.844250
13,0.475365,0.516716,0.846250
14,0.443925,0.506661,0.849750
15,0.410093,0.509516,0.849250
16,0.378922,0.499594,0.855167
17,0.344542,0.489011,0.856333
18,0.340006,0.507246,0.852333
19,0.325086,0.486156,0.861083
20,0.302048,0.490937,0.857667
21,0.288165,0.486860,0.864250
22,0.256561,0.492568,0.860917
23,0.237763,0.485718,0.862583
24,0.224336,0.486326,0.863333
25,0.209552,0.485967,0.863500
26,0.210889,0.485575,0.865250
27,0.202123,0.481421,0.865833
28,0.205988,0.486022,0.867167
29,0.173248,0.485957,0.865583
30,0.182509,0.484172,0.866750
