In [1]:
%%javascript
$('#appmode-leave').hide();                          // Hides the edit app button.
$('#appmode-busy').hide();                           // Hides the kernel busy indicator.
IPython.OutputArea.prototype._should_scroll = function(lines) {
  return false; // disable scrolling
}

<IPython.core.display.Javascript object>

In [2]:
%matplotlib inline
import os
import ipywidgets as ipw
from glob import glob
import json
import pandas as pd
import numpy as np
from PIL import Image
from itertools import cycle
from io import BytesIO
from time import time
import warnings
_CELLSET_ID = "AIzaSyC8Zo-9EbXgHfqNzDxVb_YS_IIZBWtvoJ4"
try:
    # python 2 version
    from urllib import urlopen
except:
    # python 3 version
    from urllib.request import urlopen
def get_sheet_as_df(base_url, kk, columns="A:AG"):
    """
    Gets the sheet as a list of Dicts (directly importable to Pandas)
    :return:
    """
    try:
        all_vals = "{base_url}/{cols}?key={kk}".format(base_url=base_url,
                                                        cols=columns,  # TODO: we should probably get the whole sheet
                                                        kk=kk)
        t_data = json.loads(urlopen(all_vals).read().decode('latin1'))['values']
        frow = t_data.pop(0)

        return pd.DataFrame([dict([(key, '' if idx >= len(irow) else irow[idx]) 
                                   for idx, key in enumerate(frow)]) for irow in t_data])
    except IOError as e:
        warnings.warn('Sheet could not be accessed, check internet connectivity, proxies and permissions: {}'.format(e))
        return pd.DataFrame([{}])
USERNAME=os.environ.get('APPMODE_USER', 'Not logged in')
HOSTNAME=os.environ.get('HOSTNAME', 'anon')
sheet_api_url = lambda sheet_id: "https://sheets.googleapis.com/v4/spreadsheets/{id}/values".format(id=sheet_id)

  return f(*args, **kwds)


In [3]:
with open('task.json', 'r') as f:
    annotation_task = json.load(f)
    data_df = pd.DataFrame(annotation_task['dataset']['dataframe'])
    label_col = annotation_task['dataset']['output_labels']
    image_key_col = annotation_task['dataset']['image_path']
    base_img_dir = annotation_task['dataset']['base_image_directory']

In [4]:
base_sheet_url = annotation_task['google_forms']['sheet_url']
sheet_id = base_sheet_url.strip('?usp=sharing').strip('/edit').split('/')[-1]

In [5]:
annot_df = get_sheet_as_df(sheet_api_url(sheet_id), _CELLSET_ID)
annot_df['Timestamp'] = pd.to_datetime(annot_df['Timestamp'])
annot_df['viewing_time'] = annot_df['viewing_info'].map(lambda x: json.loads(x).get('viewing_time', 0))
annot_df['annotator_class'] = annot_df['annotator'].map(lambda x: x.split('_')[0])  
annot_df['annotator_name'] = annot_df['annotator'].map(lambda x: ' '.join(x.split('_')[1:]) if x.find('_') else x)  
print('Found', annot_df.shape[0], 'completed annotations')
print('Showing most recent 3 annotations')
annot_df.tail(3)

Found 93 completed annotations
Showing most recent 3 annotations


Unnamed: 0,Timestamp,annotation_mode,annotator,commit_info,item_id,label,session,task,time,viewing_info,viewing_time,annotator_class,annotator_name
90,2018-08-14 16:25:51,BinaryClass,Data_Joshy_Cyriac,,00022021_004.png,Yes,jupyter-chestrays-2djupyanno-2dbb84prlw,Pneumothorax,1646.383108,"{""viewing_time"": 2.9976861476898193}",2.997686,Data,Joshy Cyriac
91,2018-08-14 16:25:57,BinaryClass,Data_Joshy_Cyriac,,00030106_000.png,Yes,jupyter-chestrays-2djupyanno-2dbb84prlw,Infiltration,1652.549157,"{""viewing_time"": 5.941567897796631}",5.941568,Data,Joshy Cyriac
92,2018-08-14 16:26:00,BinaryClass,Data_Joshy_Cyriac,,00008295_012.png,No,jupyter-chestrays-2djupyanno-2dbb84prlw,Cardiomegaly,1655.645694,"{""viewing_time"": 2.8806371688842773}",2.880637,Data,Joshy Cyriac


In [6]:
annot_df.groupby(['annotator_class','annotator_name']).\
    agg({'viewing_time': ['sum', 'mean'], 'label': len}).\
    reset_index().\
    rename({'label': 'count'}, axis=1).\
    round(2).\
    sort_values(('count', 'len'), ascending=False).\
    style.\
    bar(color='#d65f5f')

Unnamed: 0_level_0,annotator_class,annotator_name,viewing_time,viewing_time,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,sum,mean,len
0,Data,Joachim Hagger,194.74,6.49,30
1,Data,Joshy Cyriac,1648.14,54.94,30
2,Rad,Bram Stieltjes,406.54,13.55,30
3,random,githubber,14.04,4.68,3


In [7]:
results_list = []
# we need to combine the results from binary class and multiclass problems distinctly
def binary_correct(c_row):
    if c_row['label']==c_row[label_col]:
        return True
    elif c_row['label']==None:
        if c_row[label_col]==c_row['task']:
            # definitely wrong
            return False
        else:
            # definitely right
            return True
    else:
        # if the label is positive but not what we picked
        return False
        
    return None # we arent sure if it is right or not
for c_mode, group_annot_df in annot_df.groupby('annotation_mode'):
    group_annot_df = group_annot_df.copy()
    group_annot_df['answer'] =  group_annot_df['label']
    if c_mode=='BinaryClass':
        # make the binary result look like a more standard question
        group_annot_df['label'] = group_annot_df.apply(lambda c_row: c_row['task'] if c_row['label']=='Yes' else None, 1)
    elif c_mode=='MultiClass':
        pass
    else:
        print('Dashboard does not support {} problems yet!'.format(c_mode))
    
    c_results_df = pd.merge(group_annot_df, 
         data_df, 
         how='left', 
         left_on='item_id', 
         right_on=image_key_col)
    
    if c_mode=='BinaryClass':
        c_results_df['correct'] = c_results_df.apply(binary_correct, 1)
    elif c_mode=='MultiClass':
        c_results_df['correct'] = c_results_df.apply(lambda c_row: c_row['label']==c_row[label_col], 1)
    
    results_list += [c_results_df]
results_df = pd.concat(results_list)

# My Personal Results
Here we show the questions you directly answered

In [8]:
from PIL import Image
import base64
from io import BytesIO
from IPython.display import HTML
_wrap_uri = lambda data_uri: "data:image/png;base64,{0}".format(data_uri)
def raw_html_render(temp_df):
    """
    For rendering html tables which contain HTML information and shouldn't be escaped or cropped
    :param temp_df:
    :return:
    """
    old_wid = pd.get_option('display.max_colwidth')
    pd.set_option('display.max_colwidth', -1)
    tab_html = temp_df.to_html(classes="table table-striped table-hover",
                               escape=False,
                               float_format=lambda x: '%2.2f' % x,
                               na_rep='',
                               index=False,
                               max_rows=None,
                               max_cols=None)

    pd.set_option('display.max_colwidth', old_wid)
    return tab_html
def path_to_img(in_path):
    c_img_data = Image.open(in_path)
    c_img_data = c_img_data.convert('RGB')
    out_img_data = BytesIO()
    c_img_data.save(out_img_data, format='png')
    out_img_data.seek(0)  # rewind
    uri = _wrap_uri(base64.b64encode(out_img_data.read()).decode("ascii").replace("\n", ""))
    return '<img src="{uri}"/>'.format(uri=uri)

In [9]:
my_answers_df = results_df[results_df['annotator']==USERNAME][['Timestamp', 'correct', image_key_col, 'task' ,'answer', label_col]].copy()

my_answers_df = my_answers_df.sort_values(['Timestamp'], ascending=True).drop(['Timestamp'],1)
my_answers_df[image_key_col] = my_answers_df[image_key_col].map(lambda x: path_to_img(os.path.join(base_img_dir, x)))
my_answers_df
HTML(raw_html_render(my_answers_df.rename({image_key_col: 'Image', 
                                           'task': 'Question', 
                                           'answer': 'Your Answer', 
                                           label_col: 'Real Answer'},axis=1)))

correct,Image,Question,Your Answer,Real Answer
True,,Infiltration,Yes,Infiltration
False,,Pneumothorax,Yes,No Finding
False,,"Cardiomegaly,Effusion,Infiltration,No Finding,Pneumothorax",Pneumothorax,No Finding


# Overall Results
We can show the overall results by person / type of user

In [10]:
results_df.groupby(['annotator_class','annotator_name']).\
    agg({'viewing_time': 'mean', 'label': len, 'correct': lambda x: 100*np.mean(x)}).\
    reset_index().\
    rename({'label': 'count', 
            'correct': 'Accuracy (%)', 
            'viewing_time': 'Average Viewing Time (s)',
           'annotator_class': 'Type of User', 
           'annotator_name': 'Name'}, axis=1).\
    round(1).\
    sort_values('Accuracy (%)', ascending=False).\
    style.\
    background_gradient(cmap='hot', low=.5, high=0).\
    set_properties(**{'font-size': '12pt'})

Unnamed: 0,Type of User,Name,Average Viewing Time (s),count,Accuracy (%)
2,Rad,Bram Stieltjes,13.6,30,66.7
0,Data,Joachim Hagger,6.5,30,53.3
1,Data,Joshy Cyriac,54.9,30,46.7
3,random,githubber,4.7,3,33.3


## Disease to be identified 
Here we show the breakdown based on which condition the patient actually had

In [11]:
results_df.groupby(label_col).\
    agg({'viewing_time': 'mean', 'label': len, 'correct': lambda x: 100*np.mean(x)}).\
    reset_index().\
    rename({'label': 'count', 
            'correct': 'Accuracy (%)', 
            'viewing_time': 'Average Viewing Time (s)'
           }, axis=1).\
    round(1).\
    sort_values('Accuracy (%)', ascending=False).\
    style.\
    background_gradient(cmap='hot', low=.5, high=0).\
    set_properties(**{'font-size': '12pt'})

Unnamed: 0,Finding Labels,Average Viewing Time (s),count,Accuracy (%)
2,Infiltration,42.7,18,66.7
0,Cardiomegaly,9.8,22,59.1
4,Pneumothorax,6.8,15,53.3
3,No Finding,25.3,23,52.2
1,Effusion,39.7,15,40.0


## Question Asked
Finally we show the breakdown based on the question asked

In [12]:
results_df.groupby('task').\
    agg({'viewing_time': 'mean', 'label': len, 'correct': lambda x: 100*np.mean(x)}).\
    reset_index().\
    rename({'label': 'count', 
            'correct': 'Accuracy (%)', 
            'viewing_time': 'Average Viewing Time (s)',
            'task': 'Question Asked'
           }, axis=1).\
    round(1).\
    sort_values('Accuracy (%)', ascending=False).\
    style.\
    background_gradient(cmap='hot', low=.5, high=0).\
    set_properties(**{'font-size': '12pt'})

Unnamed: 0,Question Asked,Average Viewing Time (s),count,Accuracy (%)
5,Pneumothorax,46.0,17,64.7
3,Infiltration,20.6,17,58.8
4,No Finding,12.2,14,57.1
2,Effusion,35.3,17,52.9
0,Cardiomegaly,13.2,27,48.1
1,"Cardiomegaly,Effusion,Infiltration,No Finding,Pneumothorax",3.4,1,0.0


# Export Results
The full results can be exported and viewed in Excel or Google Sheets and analyzed even further

In [13]:
results_df.to_csv('results.csv', index = False)
from IPython.display import FileLink
print('Download Results')
FileLink('results.csv')

Download Results
