# Cell Type Classifier
Classify cell type images from Bo Sun's Lab.

In [6]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from IPython.display import Image
from PIL import Image
from matplotlib import cm

import numpy as np
import pandas as pd
import tensorflow as tf
from matplotlib import pyplot as plt
import label_image
import glob

In [7]:
def get_image_list(folder):
    image_list = {}
    for directory in glob.glob(folder+'/*/'):
        images = []
        for filename in glob.glob(str(directory)+'*.jpg'): #assuming jpg
            images.append(filename)
        image_list.update({str(directory[directory.find('\\')+1:directory.rfind('\\')]):images})
    return image_list

def classify_cells(image_directory,model_file, 
                   label_file= "output_files/output_labels.txt", 
                   input_height=224, 
                   input_width=224,
                   input_mean=0,
                   input_std=255, 
                   input_layer='Placeholder',
                   output_layer= 'final_result'):    
    print('\nClassifying directory...'+str(image_directory),'....')
    image_list = get_image_list(image_directory)
    graph = label_image.load_graph(model_file)
    
    #dict of validation images.
    val_set = {}
    print('reading images....')
    for key in image_list.keys():
        print(key+'.....',end='')
        val_images = []
        for file_name in image_list[key]:
            val_images.append(label_image.read_tensor_from_image_file(
                file_name,
                input_height=input_height,
                input_width=input_width,
                input_mean=input_mean,
                input_std=input_std))
        val_set.update({str(key):val_images})
        print('.....done')


    input_name = "import/" + input_layer
    output_name = "import/" + output_layer
    input_operation = graph.get_operation_by_name(input_name)
    output_operation = graph.get_operation_by_name(output_name)
    
    print('making predictions....')
    predictions = {}
    for key in val_set.keys():
        print(key+'.....',end='')
        val_pred = []
        for img in val_set[key]:
            with tf.Session(graph=graph) as sess:
                results = sess.run(output_operation.outputs[0], {
                input_operation.outputs[0]: img
                })
                results = np.squeeze(results)

            val_pred.append(results.argsort()[-5:][::-1][0])
        predictions.update({key:val_pred})
        print('.....done')
    labels = label_image.load_labels(label_file)
    return predictions, labels

def confusion_matrix(predictions,labels,title=None):
    n=len(labels)
    confusion_matrix = np.zeros((n,n), dtype=np.float)

    pred_scores=[]
    for i,ctype in enumerate(labels):
        err,num = 0.0,0
        for pred in predictions[ctype]:
            confusion_matrix[pred,i]+=1/len(predictions[ctype])
            num+=1
            if(pred!=i):
                err+=1
        pred_scores.append(((num-err)/num,num))

    for i,label in enumerate(labels):
        print(label,'= ', str(round(pred_scores[i][0]*100,1))+'%,',pred_scores[i][1],'samples')

    fig = plt.figure(figsize=(10,10))
    ax = fig.add_subplot(111)
    cfm = ax.matshow(confusion_matrix, cmap=plt.cm.get_cmap('hot'))
    cbar = fig.colorbar(cfm)

    ax.set_xlabel('Cell Type', fontsize=20)
    ax.set_ylabel('Prediction', fontsize=20)
    ax.set_xticklabels(['']+labels, fontsize=12)
    ax.set_yticklabels(['']+labels, fontsize=12)
    ax.xaxis.set_label_position('top')
    ax.xaxis.labelpad = 20
    ax.yaxis.labelpad = 20

    fig.tight_layout(pad=0, w_pad=1.5, h_pad=3)
    fig.set_figheight(10)
    fig.set_figwidth(10)
    fig.suptitle(title, fontsize=26)

In [8]:
# Available Trained models
# If not available in repo, first generate using retrain_helper.ipynb
inception_v1 = "output_files/output_graph_inception_v1.pb" # Final Test Accuracy: 90.0%
inception_v1_p = "output_files/output_graph_inception_v1_partial.pb" # Final Test Accuracy: 89.0%
inception_v2 = "output_files/output_graph_inception_v2.pb" # Final Test Accuracy: 90.0%. Not available in repo.
inception_v3 = "output_files/output_graph_inception_v3.pb" # Final Test Accuracy: 94.3%. Not available in repo.
mobilenet_v1 = 'output_graph_mobilenet_v1.pb' # Final Test Accuracy: 94.3% 
mobilenet_v2 = 'output_graph_mobilenet_v2.pb' # Final Test Accuracy: 92.2% 
inception_resnet_v2 = 'output_graph_inception_resnet_v2.pb' # Final Test Accuracy: 86.4%. Not available in repo.

#### Choose directory of images to classify and Model to use for classification (model_file)...

In [9]:
unseen_dir = 'unseen_validation'
train_dir = 'partial_cell_type_images'
model_file = inception_v1_p

# other paramaters with default values
label_file = "output_files/output_labels.txt"
input_height = 224
input_width = 224
input_mean = 0
input_std = 255
input_layer = 'Placeholder'
output_layer = 'final_result'

#### Run Prediction...

In [5]:
#Unseen Validation Set
unseen_predict, labels = classify_cells(unseen_dir,model_file)


Classifying directory:  unseen_validation ....
reading images....
actinedge..........done
filopodia..........done
hemisphere..........done
lamellipodia..........done
smallbleb..........done
making predictions....
actinedge.....

ValueError: Cannot feed value of shape (13, 1, 224, 224, 3) for Tensor 'import/Placeholder:0', which has shape '(?, 224, 224, 3)'

In [10]:
#Training Set
train_predict, labels = classify_cells(train_dir,model_file)


Classifying directory:  partial_cell_type_images ....
reading images....
actinedge..........done
filopodia..........done
hemisphere..........done
lamellipodia..........done
smallbleb..........done
making predictions....
actinedge..........done
filopodia..........done
hemisphere..........done
lamellipodia..........done
smallbleb..........done


In [11]:
confusion_matrix(unseen_predict, labels, title='Unseen Validation Set')

NameError: name 'unseen_predict' is not defined

In [None]:
confusion_matrix(train_predict, labels, title='Training Set')