In [62]:
import pandas as pd
from PIL import Image,ImageDraw,ImageFont
import os
import numpy as np
import math

In [63]:
feature_table = pd.read_csv("193398_feattable.csv")

In [64]:
ii = pd.read_csv("193398_individual_importances.csv")

In [65]:
feature_table = feature_table[feature_table.feature_importance>0.004]
feature_table.reset_index(drop=True,inplace=True)
top_feats = list(feature_table.feature)

In [66]:
len(top_feats)

5

In [67]:
top_feats

['ocag_id_all_officerage_max',
 'dispatch_id_p1y_dispatchinitiatiationtype_ci_sum',
 'dispatch_id_p1m_dispatchinitiatiationtype_ci_sum',
 'dispatch_id_p1y_dispatchtype_assist_sum',
 'dispatch_id_p1y_dispatchtype_assault_sum']

#### test matrix

In [68]:
test_matrix = pd.read_csv("193398_test_2014-04-01.csv")
test_matrix.set_index("officer_id",inplace=True)

In [69]:
test_matrix.sort_values(by='score',ascending=False,inplace=True)
labels = test_matrix.outcome
scores = test_matrix.score

In [70]:
test_matrix = test_matrix[test_matrix['ocag_id_all_officerage_max']!=0]

In [71]:
test_matrix.outcome.value_counts()

0    1309
1     156
Name: outcome, dtype: int64

In [72]:
prior = float(156)/1309

## Binning

In [73]:
def binticks_label(matrix,feat):
    tmp = matrix.loc[:,feat]
    tmpmin = min(tmp)
    tmpmax = max(tmp)
    tmprange = tmpmax - tmpmin
    step = tmprange / 100
    return np.arange(tmpmin,tmpmax+step,step)

In [74]:
def labelticks(matrix,feat):
    tmp = matrix.loc[:,feat]
    tmpmin = min(tmp)
    tmpmax = max(tmp)
    tmprange = tmpmax - tmpmin
    step = tmprange / 20
    return np.arange(tmpmin,tmpmax+step,step)

In [75]:
def minmax_label(matrix,feat):
    tmp = matrix.loc[:,feat]
    tmpmin = min(tmp)
    tmpmax = max(tmp)
    return tmpmin,tmpmax

### Axis labels

In [76]:
def addlabels(canvas,ticks):
    width = canvas.width
    height = canvas.height
    margin = 28
    newheight = height + margin
    
    unicanvas = Image.new('RGB',(width,newheight),(255,255,255))
    unicanvas.paste(canvas,(0,0))
    
    font = ImageFont.truetype('VeraMono.ttf', rectheight - 8 )
    
    # fontWidth, fontHeight = font.getsize(high) # can use if I have sizing issues

    draw = ImageDraw.Draw(unicanvas,'RGB')
    
    for i in range(len(ticks)):
        xpos = i * rectwidth * 5
        draw.text((xpos,newheight - 28 ),text=str(ticks[i]),font=font,fill=(0,0,0))
    
    return unicanvas

# test plot

In [77]:
def updown(X,cfeat,rectwidth,rectheight,pad):
    bins = binticks_label(test_matrix,cfeat)
    nbins = len(bins)
    pos = pd.cut(X[labels==1].loc[:,cfeat],bins=bins,labels=False,include_lowest=True)
    neg = pd.cut(X[labels==0].loc[:,cfeat],bins=bins,labels=False,include_lowest=True)
    px_w = ( nbins - 1 ) * rectwidth # a hack; won't work with all subsets
        
    ###########
    ### POS ###
    ###########
    
    posbinmax = pos.value_counts().max()
    ppx_h = posbinmax * rectheight
    
    up = Image.new('RGB',(px_w,ppx_h),(255,255,255))
    updraw = ImageDraw.Draw(up)
    
    for binn in range(nbins):
        xpos = binn * rectwidth
        ypos = ppx_h - rectheight

        tmp = pos[pos==binn]
        for i in tmp.index:
            bbox = [(xpos+pad,ypos+pad),(xpos+rectwidth-pad,ypos+rectheight-pad)]
            #updraw.rectangle(bbox,fill='hsl(0,0%,75%)',outline=None)
            ypos = ypos - rectheight
        
        actual_positives = len(tmp)
        n = actual_positives + len(neg[neg==binn])
        expected_positives = int( n * float(prior) )
        lift = actual_positives - expected_positives
        
        if lift > 0:
            ypos = ppx_h - lift*rectheight
        else:
            ypos = ppx_h
            
        coords = [(xpos,ypos),(xpos+rectwidth,ypos)]
        updraw.line(coords,fill=(220,101,113),width=6) # red

        try:
            coords = [(last_xpos+rectwidth,last_ypos),(xpos,ypos)]
            updraw.line(coords,fill=(220,101,113),width=6)
        except:
            pass

        last_ypos = ypos
        last_xpos = xpos
        
    up = addlabels(up,labelticks(test_matrix,cfeat))
    
    ###########
    ### NEG ###
    ###########
    
    # reset for line thing
    last_ypos = None
    last_xpos = None
    
    negbinmax = neg.value_counts().max()
    
    npx_h = negbinmax * rectheight
    
    down = Image.new('RGB',(px_w,npx_h),(255,255,255))
    downdraw = ImageDraw.Draw(down)
    
    for binn in range(nbins):
        xpos = binn * rectwidth
        ypos = npx_h - rectheight
                
        tmp = neg[neg==binn]
        for i in tmp.index:
            bbox = [(xpos+pad,ypos+pad),(xpos+rectwidth-pad,ypos+rectheight-pad)]
            #downdraw.rectangle(bbox,fill='hsl(0,0%,75%)',outline=None)
            ypos = ypos - rectheight
               
        actual_positives = len(pos[pos==binn])
        n = actual_positives + len(tmp)
        expected_positives = int( n * float(prior) )
        lift = actual_positives - expected_positives        
                
        if lift < 0:
            ypos = npx_h - abs(lift)*rectheight
        else:
            ypos = npx_h
            
        coords = [(xpos,ypos),(xpos+rectwidth,ypos)]
        downdraw.line(coords,fill=(112,159,210),width=6)
        
        try:
            coords = [(last_xpos+rectwidth,last_ypos),(xpos,ypos)]
            downdraw.line(coords,fill=(112,159,210),width=6)
        except:
            pass
        
        last_ypos = ypos
        last_xpos = xpos
    
    unicanvas = Image.new('RGB',(px_w,up.height+npx_h),(255,255,255))
    unicanvas.paste(up,(0,0))

    down_flipped = down.transpose(Image.FLIP_TOP_BOTTOM)
    unicanvas.paste(down_flipped,(0,up.height))

    return unicanvas,up.height,px_w

In [78]:
pad = 1

In [79]:
rectwidth = 28
rectheight = 28

In [80]:
top_feats

['ocag_id_all_officerage_max',
 'dispatch_id_p1y_dispatchinitiatiationtype_ci_sum',
 'dispatch_id_p1m_dispatchinitiatiationtype_ci_sum',
 'dispatch_id_p1y_dispatchtype_assist_sum',
 'dispatch_id_p1y_dispatchtype_assault_sum']

In [102]:
singlevars = [
    ii.risk_1[ii.entity_id==1364].iloc[0],
    ii.risk_2[ii.entity_id==1364].iloc[0],
    ii.risk_3[ii.entity_id==1364].iloc[0],
    ii.risk_4[ii.entity_id==1364].iloc[0]
    ]

In [115]:
cfeat = singlevars[3]
im,uph,px_w = updown(test_matrix,cfeat,rectwidth,rectheight,pad)



In [116]:
print test_matrix[singlevars[0]].loc[1364],test_matrix[singlevars[1]].loc[1364],test_matrix[singlevars[2]].loc[1364],test_matrix[singlevars[3]].loc[1364]

49.0 441.0 71.0 643.0


In [117]:
halfheight = 1575/2
cropbox = [0,uph-halfheight,px_w,uph+halfheight]
im = im.crop(cropbox)

In [118]:
im.save("/Users/damoncrockett/Desktop/tmpwintour/PILplatz/VASTvideo/"+cfeat+"testlp3"+".png")