In [1]:
import pandas as pd
from collections import Counter
from PIL import Image,ImageDraw,ImageFont
from scipy.stats import spearmanr
import os
import numpy as np

In [2]:
highlight = False # optional to highlight ground truth flag
colsort = True # optional to override alphasort

In [3]:
feature_table = pd.read_csv("170728_2015-01-01_alphasort.csv")

In [4]:
feature_table = feature_table[feature_table.feature_importance>0.001]

if colsort==True:
    feature_table.sort_values(by='feature_importance',inplace=True,ascending=False) 

feature_table.reset_index(drop=True,inplace=True)
top_feats = list(feature_table.feature)

#### Color settings

In [5]:
cathue = {
    'ts':'0',
    'fi':'32',
    'ir':'60',
    'shifts':'115',
    'dispatch':'180',
    'arrests':'240',
    'ic':'270',
    'demarrests':'300',
    'ocnd':'200'
    }

bkgdcolor = 'hsl(0,0%,50%)'
linecolordark = 'hsl(0,0%,0%)'
linecolormed = 'hsl(0,0%,25%)'
linecolorlight = 'hsl(0,0%,33%)'
textcolor = 'hsl(0,0%,100%)'

#### X

In [6]:
X = pd.read_csv("170728_2015-01-01_test-matrix-and-labels-and-scores.csv")
X.sort_values(by='score',ascending=False,inplace=True)
X.set_index('officer_id',inplace=True)
outcomes = X.outcome
scores = X.score
del X['outcome']
del X['score']
del X['as_of_date']

In [7]:
X = X.loc[:,top_feats]

#### Mapping saturation to data values

In [8]:
from scipy.stats import percentileofscore as pct

In [9]:
Xpct = X

In [10]:
def norm(col):
    rng = max(col) - min(col)
    tmp = [(item - min(col)) / float(rng) for item in col]
    return [int(round(item*100)) for item in tmp]

In [11]:
def twocat(cell):
    if cell==1:
        cell = 65
    elif cell==0:
        cell = 25

In [12]:
counter=-1
for col in Xpct.columns:
    counter+=1
    if np.random.binomial(1,0.01)==1:
        print counter
    if all(Xpct[col]<=0):
        Xpct[col] = 0
    elif 'ocnd' in col:
        Xpct[col] = Xpct[col].apply(twocat)
    else:
        Xpct[col] = norm(X[col])

38
99
139


In [14]:
rectwidth = 16
rectheight = 16

px_w = len(feature_table) * rectwidth
px_h = len(X) * rectheight

In [15]:
def offtablefeat(X,feature_table):
    canvas = Image.new('RGB',(px_w,px_h),bkgdcolor)
    feats = X.columns
    for i in range(len(X)):
        ycoord = i * rectheight
        
        for feat in feats:
            tmp = feature_table[feature_table.feature==feat]
            record_type = tmp.record_type.iloc[0]
            idx = tmp.index[0]
            
            xcoord = idx * rectwidth
            bbox = [xcoord, ycoord, xcoord + rectwidth, ycoord + rectheight]
            
            try:
                hue = cathue[record_type]
                satlevel = X[feat].iloc[i]
                if satlevel==None:
                    sat = '0%'
                else:
                    sat = str(satlevel)+'%'
                
                if highlight==True:
                    if outcomes.loc[i]==1:
                        val = '65%'
                    else:
                        val = '25%'
                else:
                    val = '50%'
                    
            except:
                hue = '0'
                sat = '0%'
                
                vallevel = X[feat].iloc[i]
                if vallevel==None:
                    val = '100%'
                else:
                    val = str(100-vallevel)+'%' # so low is white and black is high

            fillcolor = 'hsl('+hue+","+sat+","+val+")"
            draw = ImageDraw.Draw(canvas)
            draw.rectangle(bbox,fill=fillcolor,outline=None)
            if np.random.binomial(1,0.0005)==1:
                print i
    return canvas

In [16]:
im = offtablefeat(Xpct,feature_table)

11
13
20
42
45
78
98
100
121
142
160
172
176
180
200
237
262
296
298
304
307
318
328
346
362
379
385
418
441
446
497
518
519
532
541
552
556
559
581
581
582
594
602
604
604
611
613
629
634
636
637
659
674
685
688
692
711
711
717
722
732
741
746
748
755
796
809
851
852
855
872
879
890
895
896
923
923
924
928
929
939
965
968
974
979
997
1022
1026
1031
1059
1066
1069
1088
1093
1102
1109
1156
1159
1191
1198
1200
1210
1215
1229
1232
1238
1269
1274
1277
1284
1295
1302
1317
1337
1352
1352
1366
1369
1372
1379
1386
1392
1406
1422
1442
1443
1445
1446
1456
1461
1483
1487
1489
1496
1498
1500
1503
1509
1524
1534
1536
1544
1548


In [17]:
w = im.width
h = im.height
x = 120
canvas = Image.new('RGB',((w+x),(h)),bkgdcolor)
canvas.paste(im,(x,0))
#im = None

In [18]:
draw = ImageDraw.Draw(canvas)
counter = -1
for i in Xpct.index:
    counter+=1
    score = scores.loc[i]
    text = str(i)+","+str(score)[:4]
    font = ImageFont.truetype('VeraMono.ttf', rectheight-1)
    fontWidth, fontHeight = font.getsize(text)
    
    xpos = 5
    ypos = counter * rectheight
    draw.text((xpos,ypos),text=text,font=font,fill=textcolor)
    
    linex = w+x
    liney = counter * rectheight
    draw.line([(0,liney),(linex,liney)],width=1,fill=linecolordark)

In [19]:
w = canvas.width
h = canvas.height
y = 600
unicanvas = Image.new('RGB',((w),(h+y)),bkgdcolor)
unicanvas.paste(canvas,(0,y))
#canvas = None

In [20]:
draw = ImageDraw.Draw(unicanvas)
for i in feature_table.index:
    feature = feature_table.featstr.loc[i]
    text = str(feature)
    record_type = feature_table.record_type.loc[i]
    
    font = ImageFont.truetype('VeraMono.ttf', rectwidth-1)
    fontWidth, fontHeight = font.getsize(text)
    
    val = '50%'
    
    try:
        hue = cathue[record_type]
        sat = '50%'
    except:
        hue = '0'
        sat = '0%'

    fillcolor = 'hsl('+hue+","+sat+","+val+")"
            
    imtmp = Image.new('RGBA', (fontWidth, fontHeight),fillcolor)
    drawtmp = ImageDraw.Draw(imtmp)
    drawtmp.text((0,0),text=text,font=font,fill=textcolor)
    imtmp = imtmp.rotate(90,expand=1)
    
    xpos = i * rectwidth + x - 3
    ypos = y - imtmp.height - 2
    unicanvas.paste(imtmp,(xpos,ypos),imtmp)
    
    linex = i * rectwidth + x
    liney = y
    draw.line([(linex,liney),(linex,unicanvas.height)],width=1,fill=linecolordark)

In [21]:
unicanvas.save("/Users/damoncrockett/Desktop/X_"+str(highlight)+"_"+str(colsort)+".png")