-
Notifications
You must be signed in to change notification settings - Fork 0
/
medical-images-classification.py
1232 lines (838 loc) · 39.2 KB
/
medical-images-classification.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# coding: utf-8
# In[1]:
import seaborn as sns
# PyTorch
from torchvision import transforms, datasets, models
import torch
from torch import optim, cuda
from torch.utils.data import DataLoader, sampler
import torch.nn as nn
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)
# Data science tools
import numpy as np
import pandas as pd
import os
# Image manipulations
from PIL import Image
# Useful for examining network
# Timing utility
from timeit import default_timer as timer
# Visualizations
import matplotlib.pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')
plt.rcParams['font.size'] = 14
# ### Parameters
#
# The parameters in this cell can be changed as needed.
# In[2]:
# Location of data
datadir = '../input/oct2017/OCT2017 /'
traindir = datadir + 'train/'
validdir = datadir + 'val/'
testdir = datadir + 'test/'
save_file_name = 'vgg16-transfer-4.pt'
checkpoint_path = 'vgg16-transfer-4.pth'
# Change to fit hardware
batch_size = 128
# Whether to train on a gpu
train_on_gpu = cuda.is_available()
print(f'Train on gpu: {train_on_gpu}')
# Number of gpus
if train_on_gpu:
gpu_count = cuda.device_count()
print(f'{gpu_count} gpus detected.')
# ## Data Exploration
# Below we take a look at the number of images in each category and the size of the images.
# In[3]:
# Empty lists
categories = []
img_categories = []
n_train = []
n_valid = []
n_test = []
hs = []
ws = []
# Iterate through each category
for d in os.listdir(traindir):
categories.append(d)
# Number of each image
train_imgs = os.listdir(traindir + d)
valid_imgs = os.listdir(validdir + d)
test_imgs = os.listdir(testdir + d)
n_train.append(len(train_imgs))
n_valid.append(len(valid_imgs))
n_test.append(len(test_imgs))
# Find stats for train images
for i in train_imgs:
img_categories.append(d)
img = Image.open(traindir + d + '/' + i)
img_array = np.array(img)
# Shape
hs.append(img_array.shape[0])
ws.append(img_array.shape[1])
# Dataframe of categories
cat_df = pd.DataFrame({'category': categories,
'n_train': n_train,
'n_valid': n_valid, 'n_test': n_test}).\
sort_values('category')
# Dataframe of training images
image_df = pd.DataFrame({
'category': img_categories,
'height': hs,
'width': ws
})
cat_df.sort_values('n_train', ascending=False, inplace=True)
cat_df.head()
# #### Distribution of Images
#
# There are between approximately 36000 training images in each category.
# In[4]:
cat_df.set_index('category')['n_train'].plot.bar(figsize=(20, 6))
plt.xticks(rotation=80)
plt.ylabel('Count')
plt.title('Training Images by Category')
plt.show()
# I'm guessing the categories with more examples will be easier to learn. One way we partially get around the small number of images is through data augmentation (which we'll see in a bit).
# #### Distribution of Images Sizes
#
# The images themselves have vastly different shapes. We can see this by looking at the stats of images sizes by category.
# In[5]:
img_dsc = image_df.groupby('category').describe()
img_dsc.head()
# In[6]:
plt.figure(figsize=(10, 6))
sns.kdeplot(
img_dsc['height']['mean'], label='Average Height')
sns.kdeplot(
img_dsc['width']['mean'], label='Average Width')
plt.xlabel('Pixels')
plt.ylabel('Density')
plt.title('Average Size Distribution')
# When we use the images in the pre-trained network, we'll have to reshape them to 224 x 224. This is the size of Imagenet images and is therefore what the model expects. The images that are larger than this will be truncated while the smaller images will be interpolated.
# In[7]:
def imshow(image):
"""Display image"""
plt.figure(figsize=(6, 6))
plt.imshow(image)
plt.axis('off')
plt.show()
# Example image
x = Image.open(traindir + '/CNV/CNV-5557306-155.jpeg')
np.array(x).shape
imshow(x)
# In[8]:
x = Image.open(traindir + '/DME/DME-4441781-1.jpeg')
np.array(x).shape
imshow(x)
# # Image Preprocessing
#
# To prepare the images for our network, we have to resize them to 224 x 224 and normalize each color channel by subtracting a mean value and dividing by a standard deviation. We will also augment our training data in this stage. These operations are done using image `transforms`, which prepare our data for a neural network.
#
# ### Data Augmentation
#
# Because there are a limited number of images, we can use image augmentation to artificially increase the number of images "seen" by the network. This means for training, we randomly resize and crop the images and also flip them horizontally. A different random transformation is applied each epoch (while training), so the network effectively sees many different versions of the same image. All of the data is also converted to Torch `Tensor`s before normalization. The validation and testing data is not augmented but is only resized and normalized. The normalization values are standardized for Imagenet. <p>
#
# This is not necessary here but can be useful in some other data where training images are less for some categories
# In[9]:
# Image transformations
image_transforms = {
# Train uses data augmentation
'train':
transforms.Compose([
transforms.RandomResizedCrop(size=256, scale=(0.8, 1.0)),
transforms.RandomRotation(degrees=15),
transforms.ColorJitter(),
transforms.RandomHorizontalFlip(),
transforms.CenterCrop(size=224), # Image net standards
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])]),
# Validation does not use augmentation
'val':
transforms.Compose([
transforms.Resize(size=256),
transforms.CenterCrop(size=224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
# Test does not use augmentation
'test':
transforms.Compose([
transforms.Resize(size=256),
transforms.CenterCrop(size=224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
}
# ### Examples of Augmentation
#
# To show how augmentation works, we need to write a function that will plot a tensor as an image.
# In[10]:
def imshow_tensor(image, ax=None, title=None):
"""Imshow for Tensor."""
if ax is None:
fig, ax = plt.subplots()
# Set the color channel as the third dimension
image = image.numpy().transpose((1, 2, 0))
# Reverse the preprocessing steps
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])
image = std * image + mean
# Clip the image pixel values
image = np.clip(image, 0, 1)
ax.imshow(image)
plt.axis('off')
return ax, image
# We'll work with two example images and apply the train transformations.
# ## Data Iterators
#
# To avoid loading all of the data into memory at once, we use training `DataLoaders`. First, we create a dataset object from the image folders, and then we pass these to a `DataLoader`. At training time, the `DataLoader` will load the images from disk, apply the transformations, and yield a batch. To train and validation, we'll iterate through all the batches in the respective `DataLoader`.
#
# One crucial aspect is to `shuffle` the data before passing it to the network. This means that the ordering of the image categories changes on each pass through the data (one pass through the data is one training epoch).
# In[11]:
# Datasets from each folder
data = {'train':datasets.ImageFolder(root=traindir, transform=image_transforms['train']),
'val':datasets.ImageFolder(root=validdir, transform=image_transforms['val']),
'test':datasets.ImageFolder(root=testdir, transform=image_transforms['test'])}
# Dataloader iterators
dataloaders = {
'train': DataLoader(data['train'], batch_size=batch_size, shuffle=True),
'val': DataLoader(data['val'], batch_size=batch_size, shuffle=True),
'test': DataLoader(data['test'], batch_size=batch_size, shuffle=True)
}
# In[12]:
trainiter = iter(dataloaders['train'])
features, labels = next(trainiter)
features.shape, labels.shape
# The shape of a batch is `(batch_size, color_channels, height, width)`.
# There should be 4 different classes. We can confirm this as follows.
# In[13]:
n_classes = len(cat_df)
print(f'There are {n_classes} different classes.')
len(data['train'].classes)
# We can iterate through the `DataLoaders` when doing training, validation, and testing. This construction avoids the need to load all the data into memory and also will automatically apply the transformations to each batch. On each epoch, the `Random` transformations will be different so the network will essentially see multiple versions of each training image.
# # Pre-Trained Models for Image Classification
#
# PyTorch has many pretrained models we can use. All of these models have been trained on Imagenet which consists of millions of images across 1000 categories. What we want to do with pretrained models is freeze the early layers, and replace the classification module with our own.
#
# ## Approach
#
# The approach for using a pre-trained image recognition model is well-established:
#
# 1. Load in pre-trained weights from a network trained on a large dataset
# 2. Freeze all the weights in the lower (convolutional) layers
# * Layers to freeze can be adjusted depending on similarity of task to large training dataset
# 3. Replace the classifier (fully connected) part of the network with a custom classifier
# * Number of outputs must be set equal to the number of classes
# 4. Train only the custom classifier (fully connected) layers for the task
# * Optimizer model classifier for smaller dataset
#
# The idea behind pre-training is the early convolutional layers of a cnn extract features that are relevant for many image recognition tasks. The later, fully-connected layers, specialize to the specific dataset by learning higher-level features. Therefore, we can use the already trained convolutional layers while training only the fully-connected layers on our own dataset. Pre-trained networks have proven to be reasonably successful for a variety of tasks, and result in a significant reduction in training time and usually increases in performance.
# We'll be using the `vgg16`. The VGG networks had very good performance without taking a long time to train.
# ## Process to Use Pre-Trained Model
#
# First off, load in the model with pretrained weights.
# In[14]:
model = models.vgg16(pretrained=True)
model
# The `classifier` is the part of the model that we'll train. However, for the vgg, we'll only need to train the last few layers in the classifier and not even all of the fully connected layers.
# ### Freeze Early layers
#
# We freeze all of the existing layers in the network by setting `requires_grad` to `False`.
# In[15]:
# Freeze early layers
for param in model.parameters():
param.requires_grad = False
# ### Add on Custom Classifier
#
# We'll train a classifier consisting of the following layers
#
# * Fully connected with ReLU activation (n_inputs, 256)
# * Dropout with 40% chance of dropping
# * Fully connected with log softmax output (256, n_classes)
#
# To build our custom classifier, we use the `nn.Sequential()` module which allows us to specify each layer one after the other. We assign our custom classifier to the final `classifier` layer in the already trained vgg network. When we add on the extra layers, they are set to `require_grad=True` by default. These will be the only layers that are trained.
# In[16]:
n_inputs = model.classifier[6].in_features
# Add on classifier
model.classifier[6] = nn.Sequential(
nn.Linear(n_inputs, 256), nn.ReLU(), nn.Dropout(0.4),
nn.Linear(256, n_classes), nn.LogSoftmax(dim=1))
model.classifier
# The final output will be log probabilities which we can then use in the Negative Log Likelihood Loss.
# In[17]:
total_params = sum(p.numel() for p in model.parameters())
print(f'{total_params:,} total parameters.')
total_trainable_params = sum(
p.numel() for p in model.parameters() if p.requires_grad)
print(f'{total_trainable_params:,} training parameters.')
# Even with only a few layers set to trainable, there are still over a million parameters (weights) that will be updated during training. In effect, we are _fine-tuning_ the model to work on our problem. We already know it works well on Imagenet, and because our images are relatively similar, we should expect the model to easily _transfer_ its knowledge from Imagenet to our dataset.
# ### Move to GPU
#
# To use a gpu in PyTorch, we simply move the whole model onto the gpu. Later we'll have to move the data to gpus in our training loop.
# In[18]:
if train_on_gpu:
model = model.to('cuda')
# ## Function to Load in Pretrained Model
#
# We can refactor all that code into a single function that returns a pretrained model. This only accepts the vgg16 at the moment but can be extended to use other models.
# In[19]:
def get_pretrained_model(model_name):
"""Retrieve a pre-trained model from torchvision
Params
-------
model_name (str): name of the model (currently only accepts vgg16 and resnet50)
Return
--------
model (PyTorch model): vgg16
"""
if model_name == 'vgg16':
model = models.vgg16(pretrained=True)
# Freeze early layers
for param in model.parameters():
param.requires_grad = False
n_inputs = model.classifier[6].in_features
# Add on classifier
model.classifier[6] = nn.Sequential(
nn.Linear(n_inputs, 256), nn.ReLU(), nn.Dropout(0.2),
nn.Linear(256, n_classes), nn.LogSoftmax(dim=1))
# Move to gpu and parallelize
if train_on_gpu:
model = model.to('cuda')
return model
# In[20]:
get_ipython().system('pip install torchsummary')
from torchsummary import summary
# This should return the same as the pretrained model with the custom classifier. In the case of resnet, we replace the `fc` layers with the same classifier.
#
# The `torchsummary` library has a helpful function called `summary` which summarizes our model.
# In[21]:
model = get_pretrained_model('vgg16')
summary(model, input_size=(3, 224, 224), batch_size=batch_size, device='cuda')
# We can see that the model is quite large and training all of the layers would take a considerable time. Even with only a few layers to train, this can still take a while to train. You might need to decrease the `batch_size` if this is not fitting on your gpu (hopefully you have one).
# In[22]:
print(model.classifier[6])
# #### Mapping of Classes to Indexes
#
# To keep track of the predictions made by the model, we create a mapping of classes to indexes and indexes to classes. This will let us know the actual class for a given prediction.
# In[23]:
model.class_to_idx = data['train'].class_to_idx
model.idx_to_class = {
idx: class_
for class_, idx in model.class_to_idx.items()}
list(model.idx_to_class.items())
# # Training Loss and Optimizer
#
# The loss is the negative log likelihood and the optimizer is the Adam optimizer. The negative log likelihood in PyTorch expects log probabilities so we need to pass it the raw output from the log softmax in our model's final layer. The optimizer is told to optimizer the model parameters (only a few of which require a gradient).
#
# * Loss (criterion): keeps track of the loss itself and the gradients of the loss with respect to the model parameters (weights)
# * Optimizer: updates the parameters (weights) with the gradients
# In[24]:
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters())
# Below we can look at the parameters (weights) that will be updated by the optimizer during training.
# In[25]:
for p in optimizer.param_groups[0]['params']:
if p.requires_grad:
print(p.shape)
# # Training
#
# For training, we iterate through the train `DataLoader`, each time passing one batch through the model. One complete pass through the training data is known as an `epoch`, and we train for a set number of epochs or until early stopping kicks in (more below). After each batch, we calculate the loss (with `criterion(output, targets)`) and then calculate the gradients of the loss with respect to the model parameters with `loss.backward()`. This uses autodifferentiation and backpropagation to calculate the gradients.
#
# After calculating the gradients, we call `optimizer.step()` to update the model parameters with the gradients. This is done on every training batch so we are implementing stochastic gradient descent (or rather a version of it with momentum known as Adam). For each batch, we also compute the accuracy for monitoring and after the training loop has completed, we start the validation loop. This will be used to carry out early stopping.
#
#
# ## Early Stopping
#
# Early stopping halts the training when the validation loss has not decreased for a number of epochs. Each time the validation loss does decrease, the model weights are saved so we can later load in the best model. Early stopping is an effective method to prevent overfitting on the training data. If we continue training, the training loss will continue to decrease, but the validation loss will increase because the model is starting to memorize the training data. Early stopping prevents this from happening, and, if we save the model each epoch when the validation loss decreases, we are able to retrieve the model that does best on the validation data.
#
# Early stopping is implemented by iterating through the validation data at the end of each training epoch and calculating the loss. We use the complete validation data every time and record whether or not the loss has decreased. If it has not for a number of epochs, we stop training, retrieve the best weights, and return them. When in the validation loop, we make sure not to update the model parameters.
#
# ### Training Function
#
# The below function trains the network while monitoring a number of different parameters. We train with early stopping on the validation set. There are a number of parameters that I've tried to explain in the doc string. Hopefully, the comments and background make things somewhat understandable!
# In[26]:
def train(model,criterion,optimizer,train_loader,
valid_loader,save_file_name,max_epochs_stop=3,
n_epochs=20,print_every=1):
"""Train a PyTorch Model
Params
--------
model (PyTorch model): cnn to train
criterion (PyTorch loss): objective to minimize
optimizer (PyTorch optimizier): optimizer to compute gradients of model parameters
train_loader (PyTorch dataloader): training dataloader to iterate through
valid_loader (PyTorch dataloader): validation dataloader used for early stopping
save_file_name (str ending in '.pt'): file path to save the model state dict
max_epochs_stop (int): maximum number of epochs with no improvement in validation loss for early stopping
n_epochs (int): maximum number of training epochs
print_every (int): frequency of epochs to print training stats
Returns
--------
model (PyTorch model): trained cnn with best weights
history (DataFrame): history of train and validation loss and accuracy
"""
# Early stopping intialization
epochs_no_improve = 0
valid_loss_min = np.Inf
valid_max_acc = 0
history = []
# Number of epochs already trained (if using loaded in model weights)
try:
print(f'Model has been trained for: {model.epochs} epochs.\n')
except:
model.epochs = 0
print(f'Starting Training from Scratch.\n')
overall_start = timer()
# Main loop
for epoch in range(n_epochs):
# keep track of training and validation loss each epoch
train_loss = 0.0
valid_loss = 0.0
train_acc = 0
valid_acc = 0
# Set to training
model.train()
start = timer()
# Training loop
for ii, (data, target) in enumerate(train_loader):
# Tensors to gpu
if train_on_gpu:
data, target = data.cuda(), target.cuda()
# Clear gradients
optimizer.zero_grad()
# Predicted outputs are log probabilities
output = model(data)
# Loss and backpropagation of gradients
loss = criterion(output, target)
loss.backward()
# Update the parameters
optimizer.step()
# Track train loss by multiplying average loss by number of examples in batch
train_loss += loss.item() * data.size(0)
# Calculate accuracy by finding max log probability
_, pred = torch.max(output, dim=1)
correct_tensor = pred.eq(target.data.view_as(pred))
# Need to convert correct tensor from int to float to average
accuracy = torch.mean(correct_tensor.type(torch.FloatTensor))
# Multiply average accuracy times the number of examples in batch
train_acc += accuracy.item() * data.size(0)
# Track training progress
print(
f'Epoch: {epoch}\t{100 * (ii + 1) / len(train_loader):.2f}% complete. {timer() - start:.2f} seconds elapsed in epoch.',
end='\r')
# After training loops ends, start validation
else:
model.epochs += 1
# Don't need to keep track of gradients
with torch.no_grad():
# Set to evaluation mode
model.eval()
# Validation loop
for data, target in valid_loader:
# Tensors to gpu
if train_on_gpu:
data, target = data.cuda(), target.cuda()
# Forward pass
output = model(data)
# Validation loss
loss = criterion(output, target)
# Multiply average loss times the number of examples in batch
valid_loss += loss.item() * data.size(0)
# Calculate validation accuracy
_, pred = torch.max(output, dim=1)
correct_tensor = pred.eq(target.data.view_as(pred))
accuracy = torch.mean(
correct_tensor.type(torch.FloatTensor))
# Multiply average accuracy times the number of examples
valid_acc += accuracy.item() * data.size(0)
# Calculate average losses
train_loss = train_loss / len(train_loader.dataset)
valid_loss = valid_loss / len(valid_loader.dataset)
# Calculate average accuracy
train_acc = train_acc / len(train_loader.dataset)
valid_acc = valid_acc / len(valid_loader.dataset)
history.append([train_loss, valid_loss, train_acc, valid_acc])
# Print training and validation results
if (epoch + 1) % print_every == 0:
print(
f'\nEpoch: {epoch} \tTraining Loss: {train_loss:.4f} \tValidation Loss: {valid_loss:.4f}'
)
print(
f'\t\tTraining Accuracy: {100 * train_acc:.2f}%\t Validation Accuracy: {100 * valid_acc:.2f}%'
)
# Save the model if validation loss decreases
if valid_loss < valid_loss_min:
# Save model
torch.save(model.state_dict(), save_file_name)
# Track improvement
epochs_no_improve = 0
valid_loss_min = valid_loss
valid_best_acc = valid_acc
best_epoch = epoch
# Otherwise increment count of epochs with no improvement
else:
epochs_no_improve += 1
# Trigger early stopping
if epochs_no_improve >= max_epochs_stop:
print(
f'\nEarly Stopping! Total epochs: {epoch}. Best epoch: {best_epoch} with loss: {valid_loss_min:.2f} and acc: {100 * valid_acc:.2f}%'
)
total_time = timer() - overall_start
print(
f'{total_time:.2f} total seconds elapsed. {total_time / (epoch+1):.2f} seconds per epoch.'
)
# Load the best state dict
model.load_state_dict(torch.load(save_file_name))
# Attach the optimizer
model.optimizer = optimizer
# Format history
history = pd.DataFrame(
history,
columns=[
'train_loss', 'valid_loss', 'train_acc',
'valid_acc'
])
return model, history
# Attach the optimizer
model.optimizer = optimizer
# Record overall time and print out stats
total_time = timer() - overall_start
print(
f'\nBest epoch: {best_epoch} with loss: {valid_loss_min:.2f} and acc: {100 * valid_acc:.2f}%'
)
print(
f'{total_time:.2f} total seconds elapsed. {total_time / (epoch):.2f} seconds per epoch.'
)
# Format history
history = pd.DataFrame(
history,
columns=['train_loss', 'valid_loss', 'train_acc', 'valid_acc'])
return model, history
# In[27]:
model, history = train(model,criterion,optimizer,dataloaders['train'],
dataloaders['val'],save_file_name=save_file_name,
max_epochs_stop=3,n_epochs=20,print_every=1)
# # Training Results
#
# We can inspect the training progress by looking at the `history`.
# In[28]:
plt.figure(figsize=(8, 6))
for c in ['train_loss', 'valid_loss']:
plt.plot(
history[c], label=c)
plt.legend()
plt.xlabel('Epoch')
plt.ylabel('Average Negative Log Likelihood')
plt.title('Training and Validation Losses')
# As expected, the training loss decreases continually with epochs. At a certain point however, the validation loss stops decreasing. There is not a massive amount of overfitting, likely because we were using Dropout. With the divergence in losses, there is likely not much more to gain from further training.
# In[29]:
plt.figure(figsize=(8, 6))
for c in ['train_acc', 'valid_acc']:
plt.plot(
100 * history[c], label=c)
plt.legend()
plt.xlabel('Epoch')
plt.ylabel('Average Accuracy')
plt.title('Training and Validation Accuracy')
# As with the losses, the training accuracy increases (nearly to perfect) while the validation accuracy plateaus. The model is able to achieve above 80% accuracy right away, an indication that the convolution weights learned on Imagenet were able to easily transfer to our dataset.
# # Saving Model
#
# The `train` function saves the best model `state_dict()` which are the weights of the model. To save more information about the model, we use the below function.
# In[30]:
def save_checkpoint(model, path):
"""Save a PyTorch model checkpoint
Params
--------
model (PyTorch model): model to save
path (str): location to save model. Must start with `model_name-` and end in '.pth'
Returns
--------
None, save the `model` to `path`
"""
model_name = path.split('-')[0]
assert (model_name in ['vgg16', 'resnet50'
]), "Path must have the correct model name"
# Basic details
checkpoint = {'class_to_idx': model.class_to_idx,
'idx_to_class': model.idx_to_class,
'epochs': model.epochs}
# Extract the final classifier and the state dictionary
if model_name == 'vgg16':
checkpoint['classifier'] = model.classifier
checkpoint['state_dict'] = model.state_dict()
# Add the optimizer
checkpoint['optimizer'] = model.optimizer
checkpoint['optimizer_state_dict'] = model.optimizer.state_dict()
# Save the data to the path
torch.save(checkpoint, path)
# In[31]:
save_checkpoint(model, path=checkpoint_path)
# ## Load in a Checkpoint
#
# Now we need to write the function to load in the checkpoint. This just takes in a `path` and returns a model from a saved checkpoint.
# In[32]:
def load_checkpoint(path):
"""Load a PyTorch model checkpoint
Params
--------
path (str): saved model checkpoint. Must start with `model_name-` and end in '.pth'
Returns
--------
None, save the `model` to `path`
"""
# Get the model name
model_name = 'vgg16'
# Load in checkpoint
checkpoint = torch.load(path)
if model_name == 'vgg16':
model = models.vgg16(pretrained=True)
# Make sure to set parameters as not trainable
for param in model.parameters():
param.requires_grad = False
model.classifier = checkpoint['classifier']
# Load in the state dict
model.load_state_dict(checkpoint['state_dict'])
total_params = sum(p.numel() for p in model.parameters())
print(f'{total_params:,} total parameters.')
total_trainable_params = sum(
p.numel() for p in model.parameters() if p.requires_grad)
print(f'{total_trainable_params:,} total gradient parameters.')
# Move to gpu
if train_on_gpu:
model = model.to('cuda')
# Model basics
model.class_to_idx = checkpoint['class_to_idx']
model.idx_to_class = checkpoint['idx_to_class']
model.epochs = checkpoint['epochs']
# Optimizer
optimizer = checkpoint['optimizer']
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
return model, optimizer
# In[33]:
model, optimizer = load_checkpoint(path=checkpoint_path)
summary(model, input_size=(3, 224, 224), batch_size=batch_size)
# We can now use these two functions to save and load in a complete model. We can use this model to continue training.
# In[34]:
model, history = train(model,criterion,optimizer,dataloaders['train'],dataloaders['val'],
save_file_name=save_file_name,max_epochs_stop=3,n_epochs=20,print_every=1)
# Further training is unlikely to improve the validation results. Let's move to inference.
# # Inference
#
# After the model has been trained to the point on no more improvement on the validation data, we need to test it on data it has never seen. Early stopping with a validation set does decrease overfitting to the training set, but at the cost of potentially overfitting to the validation data. For a final estimate of the model's performance, we need to use the hold out testing data.
# Here, we'll look at individual predictions along with loss and accuracy on the entire testing dataset.
# This function processes an image path into a PyTorch tensor for predictions. It applies the same transformations as was done to the validation data: cropping (center) and normalizing with means and standard deviations.
# In[35]:
def process_image(image_path):
"""Process an image path into a PyTorch tensor"""
image = Image.open(image_path)
# Resize
img = image.resize((256, 256))
# Center crop
width = 256
height = 256
new_width = 224
new_height = 224
left = (width - new_width) / 2
top = (height - new_height) / 2
right = (width + new_width) / 2
bottom = (height + new_height) / 2
img = img.crop((left, top, right, bottom))
# Convert to numpy, transpose color dimension and normalize
img = np.array(img)/ 256
# Standardization
means = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
stds = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
img = img - means
img = img / stds
img_tensor = torch.Tensor(img)
return img_tensor
# In[36]:
x = process_image(testdir + '/DRUSEN/DRUSEN-8039905-1.jpeg')
x.shape
# In[37]:
ax, image = imshow_tensor(x)
# In[38]:
ax, image = imshow_tensor(process_image(testdir + '/NORMAL/NORMAL-3077276-1.jpeg'))
# ## Function to make predictions
#
# The next function makes predictions on a single image. It will return the top probabilities and classes.
# In[39]:
def predict(image_path, model, topk=4):
"""Make a prediction for an image using a trained model
Params
--------
image_path (str): filename of the image
model (PyTorch model): trained model for inference
topk (int): number of top predictions to return
Returns
"""
real_class = image_path.split('/')[-2]
# Convert to pytorch tensor
img_tensor = process_image(image_path)
# Resize
if train_on_gpu:
img_tensor = img_tensor.view(1, 3, 224, 224).cuda()
# Set to evaluation
with torch.no_grad():
model.eval()
# Model outputs log probabilities
out = model(img_tensor)
ps = torch.exp(out)
# Find the topk predictions
topk, topclass = ps.topk(topk, dim=1)
# Extract the actual classes and probabilities
top_classes = [model.idx_to_class[class_] for class_ in topclass.cpu().numpy()[0]]
top_p = topk.cpu().numpy()[0]
return img_tensor.cpu().squeeze(), top_p, top_classes, real_class
# This function can be used to pick a random test image.
# In[40]:
def random_test_image():
"""Pick a random test image from the test directory"""
c = np.random.choice(cat_df['category'])
root = testdir + c + '/'
img_path = root + np.random.choice(os.listdir(root))