In [1]:
import pandas as pd
from collections import Counter
from PIL import Image,ImageDraw,ImageFont
from scipy.stats import spearmanr
import os
import numpy as np

In [2]:
limefeat_alphasort = pd.read_csv("limefeat_alphasort.csv")
officer_table = pd.read_csv("officer_table.csv")
df = pd.read_csv("lime-robustness-all-score-outcome.csv")

In [3]:
def relister(liststring):
    tmp = liststring.lstrip("[").rstrip("]")
    tmp = tmp.split(",")
    tmp = [item.translate(None," '") for item in tmp]
    return tmp

In [4]:
def setstring2list(setstring):
    tmp = setstring.lstrip("{").rstrip("}")
    tmp = tmp.split(",")
    tmp = [item.translate(None," '") for item in tmp]
    return tmp

In [5]:
officer_table['feats'] = officer_table.feats.apply(relister)
df['unq'] = df.unq.apply(setstring2list)

In [6]:
officer_table.sort_values(by="unit_score",ascending=False,inplace=True)
officer_table.reset_index(drop=True,inplace=True)

#### Feature categories

In [7]:
cathue = {
    'ts':'0',
    'fi':'32',
    'ir':'60',
    'shifts':'115',
    'dispatch':'180',
    'arrests':'240',
    'ic':'270',
    'demarrests':'300',
    'ocnd':'200'
    }

catsat = {
    'ts':'100%',
    'fi':'100%',
    'ir':'100%',
    'shifts':'100%',
    'dispatch':'100%',
    'arrests':'100%',
    'ic':'100%',
    'demarrests':'100%',
    'ocnd':'100%'
    }

catval = {
    '1d':'5%',
    '1w':'25%',
    '1m':'50%',
    '1y':'75%',
    '5y':'100%',
    'all':'100%',
    }

#### X

In [8]:
X = pd.read_csv("32092_2014-04-11_test-matrix-and-labels.csv")
del X['outcome']
del X['as_of_date']
tmp = officer_table[['officer_id','unit_score']]
tmp.set_index('officer_id',inplace=True)
X = X.join(tmp,on='officer_id')
X.sort_values(by='unit_score',ascending=False,inplace=True)
X.reset_index(drop=True,inplace=True)
del X['officer_id']
del X['unit_score']

In [9]:
# we want to flag the rows that only have ts, ir, and ocnd

feature_type_sets = []
for i in officer_table.index:
    type_list = []
    features = officer_table.feats.loc[i]
    for feature in features:
        tmp = limefeat_alphasort[limefeat_alphasort.feature==feature]
        record_type = tmp.record_type.iloc[0]
        type_list.append(record_type)
    feature_type_sets.append(set(type_list))
officer_table['feature_type_sets'] = feature_type_sets
officer_table['type_flag'] = [len(item) for item in officer_table.feature_type_sets]

In [10]:
rectwidth = 16
rectheight = 16
px_w = len(limefeat_alphasort) * rectwidth
px_h = len(X) * rectheight

In [11]:
def offtablefeat(X,feature_table):
    canvas = Image.new('RGB',(px_w,px_h),'hsl(0,0%,100%)')
    feats = X.columns
    for i in X.index:
        ycoord = i * rectheight
        
        for feat in feats:
            tmp = feature_table[feature_table.feature==feat]
            record_type = tmp.record_type.iloc[0]
            idx = tmp.index[0]
            
            xcoord = idx * rectwidth
            bbox = [xcoord, ycoord, xcoord + rectwidth, ycoord + rectheight]
            
            try:
                hue = cathue[record_type]
                
                if officer_table.type_flag.loc[i]==3:  # they have same index
                    val = '65%'
                else:
                    val = '25%'
                
                if X[feat].loc[i]==0:
                    sat = '0%'
                else:
                    sat = sat = catsat[record_type]
            
            except: # white
                hue = '0'
                sat = '0%'
                val = '100%'

            fillcolor = 'hsl('+hue+","+sat+","+val+")"
            draw = ImageDraw.Draw(canvas)
            draw.rectangle(bbox,fill=fillcolor,outline=None)
    return canvas

In [12]:
offtablefeat(X,limefeat_alphasort).save("X.png")