In [1]:
import pandas as pd
from PIL import Image,ImageDraw,ImageFont
import os
import numpy as np

In [2]:
feature_table = pd.read_csv("170728_2015-01-01_alphasort.csv")

In [3]:
feature_table = feature_table[feature_table.feature_importance>0.0015]
feature_table.reset_index(drop=True,inplace=True)
top_feats = list(feature_table.feature)

In [4]:
len(top_feats)

5

#### Color settings

In [5]:
cathue = {
    'ts':'0',
    'fi':'32',
    'ir':'60',
    'shifts':'115',
    'dispatch':'180',
    'arrests':'240',
    'ic':'270',
    'demarrests':'300',
    'ocnd':'200'
    }

#### test matrix

In [6]:
test_matrix = pd.read_csv("170728_2015-01-01_test-matrix-and-labels-and-scores.csv")
test_matrix.sort_values(by='score',ascending=False,inplace=True)
outcomes = test_matrix.outcome
scores = test_matrix.score
del test_matrix['outcome']
del test_matrix['score']
del test_matrix['as_of_date']
del test_matrix['officer_id']

In [7]:
test_matrix = test_matrix.loc[:,top_feats]

## Binning

In [8]:
def binticks(matrix,featidx):
    tmp = matrix.iloc[:,featidx]
    tmpmin = min(tmp)
    tmpmax = max(tmp)
    tmprange = tmpmax - tmpmin
    step = tmprange / 100
    return np.arange(tmpmin,tmpmax+step,step)

In [9]:
def labelticks(matrix,featidx):
    tmp = matrix.iloc[:,featidx]
    tmpmin = min(tmp)
    tmpmax = max(tmp)
    tmprange = tmpmax - tmpmin
    step = tmprange / 20
    return np.arange(tmpmin,tmpmax,step)

In [10]:
def minmax(matrix,featidx):
    tmp = matrix.iloc[:,featidx]
    tmpmin = min(tmp)
    tmpmax = max(tmp)
    return tmpmin,tmpmax

### Axis labels

In [11]:
def addlabels(canvas,ticks):
    width = canvas.width
    height = canvas.height
    margin = 64
    newheight = height + margin
    
    unicanvas = Image.new('RGB',(width,newheight),(255,255,255))
    unicanvas.paste(canvas,(0,0))
    
    font = ImageFont.truetype('VeraMono.ttf', rectheight - 10 )
    
    # fontWidth, fontHeight = font.getsize(high) # can use if I have sizing issues

    draw = ImageDraw.Draw(unicanvas,'RGB')
    
    for i in range(len(ticks)):
        xpos = i * rectwidth * 5
        draw.text((xpos,newheight - 60 ),text=str(int(ticks[i])),font=font,fill=(0,0,0))
    
    return unicanvas

# Test matrix plots

In [12]:
def scorenorm(item):
    scoremin = min(scores)
    scorerng = max(scores) - scoremin
    return int( ( ( 1 - ( ( item - scoremin ) / scorerng ) ) ) * 100 )

In [13]:
def dblriskhist(X,featidx,rectwidth,rectheight,pad):
    pos = pd.cut(X[outcomes==1].iloc[:,featidx],bins=binticks(test_matrix,featidx),labels=False,include_lowest=True)
    neg = pd.cut(X[outcomes==0].iloc[:,featidx],bins=binticks(test_matrix,featidx),labels=False,include_lowest=True)
    
    px_w = ( len(binticks(test_matrix,featidx)) - 1 ) * rectwidth # a hack; won't work with all subsets
    
    ###########
    ### POS ###
    ###########
    
    posbinmax = pos.value_counts().max()
    ppx_h = posbinmax * rectheight
    
    up = Image.new('RGB',(px_w,ppx_h),(255,255,255))
    updraw = ImageDraw.Draw(up)
    
    for binn in pos.value_counts().keys():
        xpos = binn * rectwidth
        ypos = ppx_h - rectheight

        tmp = pos[pos==binn]

        for i in tmp.index:
            bbox = [(xpos+pad,ypos+pad),(xpos+rectwidth-pad,ypos+rectheight-pad)]
            updraw.rectangle(bbox,fill='hsl(0,0%,'+str(scorenorm(scores[i]))+'%)',outline=(189,189,189))
            ypos = ypos - rectheight
            
    up = addlabels(up,labelticks(test_matrix,featidx))
    
    ###########
    ### NEG ###
    ###########
    
    negbinmax = neg.value_counts().max()
    
    npx_h = negbinmax * rectheight
    
    down = Image.new('RGB',(px_w,npx_h),(255,255,255))
    downdraw = ImageDraw.Draw(down)
    updraw = ImageDraw.Draw(up) # have to reset to draw overheight lines
    
    for binn in sorted(neg.value_counts().keys()): # sorted or else line thing won't work
        xpos = binn * rectwidth
        ypos = npx_h - rectheight
                
        tmp = neg[neg==binn]
        negbinheight = len(tmp)
        nneg = len(neg)
        npos = len(pos)
        overheight = int( negbinheight * npos / nneg )
        
        for i in tmp.index:
            bbox = [(xpos,ypos),(xpos+rectwidth,ypos+rectheight)]
            downdraw.rectangle(bbox,fill='hsl(0,0%,'+str(scorenorm(scores[i]))+'%)',outline=(189,189,189))
            ypos = ypos - rectheight
                    
        ypos = ppx_h - overheight*rectheight
        coords = [(xpos,ypos),(xpos+rectwidth,ypos)]
        updraw.line(coords,fill=(255,128,0),width=13)
        
        try:
            coords = [(last_xpos+rectwidth,last_ypos),(xpos,ypos)]
            updraw.line(coords,fill=(255,128,0),width=13)
        except:
            pass
        
        last_ypos = ypos
        last_xpos = xpos
    
    unicanvas = Image.new('RGB',(px_w,up.height+npx_h),(255,255,255))
    unicanvas.paste(up,(0,0))

    down_flipped = down.transpose(Image.FLIP_TOP_BOTTOM)
    unicanvas.paste(down_flipped,(0,up.height))

    return unicanvas

In [14]:
rectwidth = 64
rectheight = 64
pad = 0

In [15]:
for featidx in range(len(top_feats)):    
    im = dblriskhist(test_matrix,featidx,rectwidth,rectheight,pad)
    im.save("/Users/damoncrockett/Desktop/tmpwintour/PILplatz/dblrisk_white/"+top_feats[featidx]+".png")