# Build an image classifier that tells if a fruit is a Durian or a Jackfruit
---
### Key Concepts:
  * Image Classifier (2 classes: {Durian, Jackfruit})
  * Transfer Learning (ResNet18)
  * Pytorch
  * ** <u>REMOTE, distributed compute and data</u> **
  * ** <u>Azure Machine Learning SDK</u> **

### Learning Objectives:
  * Load and explore Image dataset
  * Train model
  * Predict based on model (Inference)
    * ** <u>Register model</u> **
    * ** <u>Build and invoke webservice</u> **

###  Dataset:
  * small dataset containing images of Durians and Jackfruits
---

In [None]:
import azureml.core
from azureml.core import Workspace

# Load the workspace from the saved config file
ws = Workspace.from_config()
print('Ready to use Azure ML {} to work with {}'.format(azureml.core.VERSION, ws.name))

In [None]:
experiment_name = 'DurianvsJackfruit_Experiment'

from azureml.core import Experiment
exp = Experiment(workspace=ws, name=experiment_name)

In [None]:
# Setup the compute target
compute_target = "cc-pycon2020" # REMOTE CLUSTER

# Traning variables
n_epochs = 2
learning_rate=0.001

In [None]:
import os, shutil

# Create a folder for the experiment files
folder_name = 'durian-experiment-files'
experiment_folder = './' + folder_name
os.makedirs(folder_name, exist_ok=True)

In [None]:
%%writefile $experiment_folder/train.py
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import time
import os
import copy

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler

from azureml.core import Run

import azureml.core
from azureml.core import Workspace

# Get parameters
parser = argparse.ArgumentParser()
parser.add_argument('--data-folder', type=str, dest='data_folder', help='data folder reference')
parser.add_argument('--epochs', type=str, dest='epochs', default="1", help='number of epochs')
parser.add_argument('--learning-rate', type=float, dest='l_rate', default=0.005, help='learning rate')
parser.add_argument('--output_folder', type=str, dest='output_folder', default="outputs", help='output folder')

args = parser.parse_args()
data_folder = args.data_folder
epochs = int(args.epochs)
lrate = args.l_rate
output_folder = args.output_folder

data_dir = data_folder
print('Mounting at: ',data_dir)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('Device: ',device)
print('Epochs:',str(epochs))
print('Learning Rate',lrate)

CUDA_LAUNCH_BLOCKING=1 
#number of colors on images (B&W=1, RGB=3)
num_channels = 3
model_save_path = 'durian.pth'

#
# Do Data Transformations and load into Dataloader
#
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize([256, 256]),        
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
    ]),
    'val': transforms.Compose([
        transforms.Resize([256, 256]),
        transforms.ToTensor(),
    ]),
    'test': transforms.Compose([
        transforms.Resize([256, 256]),
        transforms.ToTensor(),
    ])   
}

image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                        data_transforms[x])
                for x in ['train', 'val','test']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,
                                            shuffle=True, num_workers=4)
            for x in ['train', 'val','test']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val','test']}

class_names = image_datasets['train'].classes

print('Classes loaded:',class_names)

num_letters = len(class_names)

def train_model(net, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()
    best_model_wts = copy.deepcopy(net.state_dict())
    best_acc = 0.0
    lowest_loss = 100.0
    
    for epoch in range(num_epochs):  # loop over the dataset multiple times
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                net.train()  # Set model to training mode
            else:
                net.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:

                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = net(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.7f} Acc: {:.7f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
            if phase == 'val' and epoch_loss < lowest_loss:
                lowest_loss = epoch_loss
                best_model_wts = copy.deepcopy(net.state_dict())

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Lowest val loss: {:4f}'.format(lowest_loss))

    print('accuracy: ' + str(best_acc))
    run.log('accuracy', float(best_acc.cpu().numpy()))
            
    print('Loss: ' + str(lowest_loss))
    run.log('loss', float(lowest_loss))        
        
    print('Finished Training')
    return net
    
run = Run.get_context()

print('Started training')

model_ft = models.resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_features

print('Number of classes:',len(class_names))
model_ft.fc = nn.Linear(num_ftrs, len(class_names))

model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=lrate, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

net = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                       num_epochs=epochs)

print('Model trained.')


# saving models
os.makedirs(output_folder, exist_ok=True)
torch.save(net, output_folder+'/'+model_save_path)

run.complete()

In [None]:
from azureml.core import Datastore,Dataset

datastore = Datastore.get(ws, 'pycon2020_source_images')
planes_ds = Dataset.File.from_files((datastore, 'durian/*/*/*'))

In [None]:
from azureml.core import Experiment
from azureml.widgets import RunDetails
from azureml.train.dnn import PyTorch

# Set up the parameters
script_params = {
    '--data-folder': planes_ds.as_named_input('pycon2020_source_images').as_mount(),
    '--epochs': n_epochs,
    '--learning-rate': learning_rate
}

# Create an estimator
from azureml.train.estimator import Estimator

estimator = Estimator(source_directory=experiment_folder,
        script_params=script_params,
        compute_target=compute_target,
        entry_script='train.py',
        use_gpu=True,
        pip_packages=['azureml-dataprep[pandas,fuse]'],
        conda_packages=['pytorch','torchvision'])

# Run the experiment
run = exp.submit(config=estimator)

# Show the run details while running
RunDetails(run).show()
run.wait_for_completion()

In [None]:
print(run.get_file_names())

## Register model with AML Service
---

In [None]:
model_save_path = 'durian.pth'

# register model 
model = run.register_model(model_name='durian', model_path='outputs/durian.pth')
print(model.name, model.id, model.version, sep='\t')

## Deploy model
---
1. Create environment dependencies
1. Build score.py script (that get's executed on the Webservice calls)
1. Deploy Webservice

In [None]:
from azureml.core.conda_dependencies import CondaDependencies 

myenv = CondaDependencies.create(pip_packages=['azureml-defaults', 'torch', 'torchvision>=0.5.0'])

with open("myenv.yml","w") as f:
    f.write(myenv.serialize_to_string())
    
print(myenv.serialize_to_string())

In [None]:
%%writefile score.py
import os
import torch
import torch.nn as nn
from torchvision import transforms
import json

from azureml.core.model import Model


def init():
    global model
    # AZUREML_MODEL_DIR is an environment variable created during deployment.
    # It is the path to the model folder (./azureml-models/$MODEL_NAME/$VERSION)
    # For multiple models, it points to the folder containing all deployed models (./azureml-models)
    model_path = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'durian.pth')
    model = torch.load(model_path, map_location=lambda storage, loc: storage)
    model.eval()


def run(input_data):
    input_data = torch.tensor(json.loads(input_data)['data'])

    # get prediction
    with torch.no_grad():
        output = model(input_data)
        classes = ['Durian', 'Jackfruit']
        softmax = nn.Softmax(dim=1)
        pred_probs = softmax(output).numpy()[0]
        index = torch.argmax(output, 1)

    result = {"label": classes[index], "probability": str(pred_probs[index])}
    return result

In [None]:
from azureml.core.webservice import AciWebservice
from azureml.core.model import InferenceConfig
from azureml.core.webservice import Webservice
from azureml.core.model import Model
from azureml.core.environment import Environment


myenv = Environment.from_conda_specification(name="myenv", file_path="myenv.yml")
inference_config = InferenceConfig(entry_script="score.py", environment=myenv)

aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, 
                                               memory_gb=1, 
                                               tags={'data': 'Durians and Jackfruits',  
                                                     'method':'transfer learning', 'framework':'pytorch'},
                                               description='Classify Durians/Jackfruits using transfer learning with PyTorch')

service = Model.deploy(workspace=ws, 
                           name='aci-durian', 
                           models=[model], 
                           inference_config=inference_config, 
                           deployment_config=aciconfig)
service.wait_for_deployment(True)
print(service.state)

In [None]:
service.get_logs()

In [None]:
print(service.scoring_uri)

## Invoke scoring Web Service
---

In [None]:
import torch
from torchvision import transforms
# import json
from PIL import Image, ImageFile
import matplotlib.pyplot as plt
import urllib.request
    
def preprocess(image_file):
    """Preprocess the input image."""
    data_transforms = transforms.Compose([
        transforms.Resize([256, 256]),
        transforms.ToTensor()
    ])

    image = Image.open(image_file)
    image = data_transforms(image).float()
    image = torch.tensor(image)
    image = image.unsqueeze(0)
    return image.numpy()

def load_image():
    # ImageFile.LOAD_TRUNCATED_IMAGES = True
    URL = 'https://upload.wikimedia.org/wikipedia/commons/b/bc/Durian_in_black.jpg'

    request = urllib.request.Request(URL)
    image = urllib.request.urlopen(request)

    return image

In [None]:
from PIL import Image, ImageFile
import matplotlib.pyplot as plt

image = load_image()

%matplotlib inline
plt.imshow(Image.open(image))

In [None]:
from azureml.core.webservice import Webservice
import json

# Print webservices in this workspace
services = Webservice.list(ws)

for service in services:
    print(service.name)
    print(service.scoring_uri)
    print(service.swagger_uri)

# Get our Webservice (by name)
service = Webservice(workspace=ws,name='aci-durian')

# Call the webservice and print results for the image in URL
image = load_image()
input_data = preprocess(image)

result = service.run(input_data=json.dumps({'data': input_data.tolist()}))

print(result)

In [None]:
import requests
import json

scoring_uri = 'http://bc33c818-47c7-441e-a78d-02b68857d452.southeastasia.azurecontainer.io/score'

headers = {'Content-type':'application/json'}

image = load_image()
input_data = preprocess(image)

response = requests.post(scoring_uri, data = json.dumps({'data': input_data.tolist()}), headers = headers)

print(response.json())
print(response.status_code)
print(response.elapsed)
