<a href="https://colab.research.google.com/github/jessecanada/MAPS/blob/master/MAPS_4_phenotype_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# MAPS Step 4 - phenotype classification with Azure
## This notebook will guide you through how to deploy your own Azure object detection model to perform phenotype classification.
### For help with creating an Azure deep learning model, follow this guide: https://docs.microsoft.com/en-us/azure/cognitive-services/custom-vision-service/get-started-build-detector

## Set up Azure environtment

In [None]:
!pip -q install azure-cognitiveservices-vision-customvision
from azure.cognitiveservices.vision.customvision.training import CustomVisionTrainingClient
from azure.cognitiveservices.vision.customvision.training.models import ImageFileCreateEntry, Region
from azure.cognitiveservices.vision.customvision.prediction import CustomVisionPredictionClient

In [None]:
from google.colab import files
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
import os
%matplotlib inline

In [None]:
# gather the following info from your Azure Custom Vision project and input them here

ENDPOINT = "input endpoing"
training_key = "input training key"
prediction_key = "input prediction key"
prediction_resource_id = "input resource ID"
trainer = CustomVisionTrainingClient(training_key, endpoint=ENDPOINT)

publish_iteration_name = "input iteration name"

project = trainer.get_project(project_id='input project ID')

In [None]:
project.id # double check project ID to make sure it's correct

In [None]:
trainer = CustomVisionTrainingClient(training_key, endpoint=ENDPOINT)
predictor = CustomVisionPredictionClient(prediction_key, endpoint=ENDPOINT)

## Get your files ready

> ### It is recommanded that you upload your cell images onto Google Drive, then import it into this virtual machine session.
> ### Alternatively, you can upload from your local drive to this session.



In [None]:
# mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!unzip file_path -d /content/

In [None]:
# confirm how many cells are to be analyzed
!ls unzipped_folder_path | wc -l

## Azure classification predictions

### Do a test prediction

In [None]:
# first image and first set of predictions

with open('input file path', "rb") as image: # r = read, b = binary (eg. images)
    results = predictor.classify_image(project.id, publish_iteration_name, image)

    # get the prediction results
    for prediction in results.predictions:
        print("\t" + prediction.tag_name + ": {0:.2f}%".format(prediction.probability * 100))
    
    print()
    
    probabilities = [prediction.probability*100 for prediction in results.predictions]
    col_names = [prediction.tag_name for prediction in results.predictions]

    df_test1 = pd.DataFrame([probabilities], columns = col_names)
    df_test1.insert(0, 'image_ID', 'test_img_1')

    print(df_test1)

	non_nuclear: 99.95%
	diffused: 0.05%
	nuclear: 0.00%

     image_ID  non_nuclear  diffused   nuclear
0  test_img_1     99.95431  0.045688  0.000012


### Make batch predictions

In [None]:
wrk_dir = '/content/Y180H_singles_RGB_manually_filtered/'
temp_list = []

for entry in os.scandir(wrk_dir):
  if entry.name.endswith('.jpg'):
    image_ID = entry.name[:-4]
    print(f'image_ID: {image_ID}')

    # open an image and get back the prediction results
    with open(wrk_dir+entry.name, mode="rb") as image: #'r'-read, 'b'-binary (for images)
      results = predictor.classify_image(project.id, publish_iteration_name, image)
    
      # get the results
      tags = [prediction.tag_name for prediction in results.predictions]
      probabilities = [prediction.probability*100 for prediction in results.predictions]
      # make a dictionary of tag:prob pairs
      predictions_dict = dict(zip(tags, probabilities))
      # sort the tags in alphabetical order, append the corresponding prob of the sorted tags
      predictions_list = [predictions_dict[i] for i in sorted(predictions_dict)]
      # add image_ID to the beginning of the list
      predictions_list.insert(0, image_ID)
      # append the sorted list to a list as a compound list
      temp_list.append(predictions_list)
    
      for i in sorted(predictions_dict) : 
        print(f'{i}: {predictions_dict[i]}:.2f') 
      print()

image_ID: merged_191024160001_A02f100_2
diffused: 1.06238875e-06:.2f
non_nuclear: 100.0:.2f
nuclear: 1.76529166e-31:.2f

image_ID: merged_191024160002_A02f250_4
diffused: 0.38223352699999996:.2f
non_nuclear: 99.61777000000001:.2f
nuclear: 1.41995936e-13:.2f

image_ID: merged_191024160002_A02f249_5
diffused: 99.9204338:.2f
non_nuclear: 0.0119858538:.2f
nuclear: 0.06758246450000001:.2f

image_ID: merged_191024160002_A02f262_7
diffused: 99.99997619999999:.2f
non_nuclear: 2.20268831e-05:.2f
nuclear: 1.00542645e-15:.2f

image_ID: merged_191024160002_A02f294_12
diffused: 0.0019529460000000001:.2f
non_nuclear: 99.998045:.2f
nuclear: 2.8986516299999996e-15:.2f

image_ID: merged_191024160002_A02f275_2
diffused: 100.0:.2f
non_nuclear: 4.500061e-06:.2f
nuclear: 2.62980924e-13:.2f

image_ID: merged_191024160002_A02f289_0
diffused: 0.00246070158:.2f
non_nuclear: 99.9975443:.2f
nuclear: 3.6422344100000003e-20:.2f

image_ID: merged_191024160002_A02f262_0
diffused: 1.77318616e-33:.2f
non_nuclear: 100.

In [None]:
# Print out a few lines to see the predicted probabilities

col_names = [i for i in sorted(predictions_dict)]
col_names.insert(0, 'image_ID')
df_predict = pd.DataFrame(temp_list, columns = col_names)
df_predict.head(10)

Unnamed: 0,image_ID,diffused,non_nuclear,nuclear
0,merged_191024160001_A02f100_2,1.062389e-06,100.0,1.765292e-31
1,merged_191024160002_A02f250_4,0.3822335,99.61777,1.419959e-13
2,merged_191024160002_A02f249_5,99.92043,0.011986,0.06758246
3,merged_191024160002_A02f262_7,99.99998,2.2e-05,1.005426e-15
4,merged_191024160002_A02f294_12,0.001952946,99.998045,2.898652e-15
5,merged_191024160002_A02f275_2,100.0,5e-06,2.629809e-13
6,merged_191024160002_A02f289_0,0.002460702,99.997544,3.6422339999999996e-20
7,merged_191024160002_A02f262_0,1.773186e-33,100.0,2.421084e-30
8,merged_191024160002_A02f298_6,7.095198e-05,7.21356,92.78637
9,merged_191024160002_A02f310_6,3.816229,96.18377,2.202939e-11


In [None]:
len(df_predict) # does it match the number of images in the folder?

156

In [None]:
# save the dataframe to a csv file
df_predict.to_csv('your_file_name.csv', index=False)