### Importing Libraries

In [1]:
#!pip install Pillow
#!pip install image
#!pip install pytorch torchvision
#!pip install gdown

# Pillow is the Python Image Library.
import PIL
# Django Applictin that provides cropping, resizing, thumbnailing, overlays and masking for images and videos.
from PIL import Image
# Provides Tensor computation (like numpy) with GPU acceleration and Deep Neural Networks built on a tape-based autograd system.
import torch
# Provides common image transformations
import torchvision
from torchvision import datasets, transforms, models
# Downloading a large file from Google Drive
import gdown
import os

### Training on GPU or CPU ?

In [2]:
# Check to see if CUDA is available.
train_on_gpu = torch.cuda.is_available()

if not train_on_gpu:
    print('CUDA is not available. Training on CPU ...')
else:
    print('CUDA is available. Training on GPU ...')
    
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

CUDA is not available. Training on CPU ...
cpu


### Download the Data

In [None]:
output = 'breast_cancer_data_v1.zip'

# Complete dataset ~4GB
#url = 'https://drive.google.com/uc?id=1k2RHhOLHYv2mTLk0GE0SajjfvpziEHJI'

# mini dataset (10 malignant and 10 benign PNGs)
url = 'https://drive.google.com/uc?id=12L3PE1YI-XOXdyuLNIe7cHu-JaoqMpW3'

gdown.download(url, output, quiet=False)
!tar xf {output}

In [4]:
# Organize the dataset
x = %pwd   # find current directory
data_home = x + '/cancer_data_v1'
train_dir = data_home + '/train'
valid_dir = data_home + '/valid'
num_workers = 4
batch_size = 32

### Transforms, Augmentation, and Normalization

In [5]:
# Define transforms, data augmentation, and normalization
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomRotation(30),
        transforms.RandomResizedCrop(224),
        transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'valid': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

### Load datasets with ImageFolder

In [6]:
# use ImageFolder to load the dataset
image_datasets = {x: datasets.ImageFolder(os.path.join(data_home, x), data_transforms[x])
                 for x in ['train', 'valid']}

# Using the image datasets and the trainforms, define the dataloaders
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size,
                                             shuffle=True, num_workers=num_workers)
              for x in ['train', 'valid']}

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'valid']}

class_names = image_datasets['train'].classes

### Building and training the classifier
