In [20]:
import pandas as pd
from PIL import Image,ImageDraw,ImageFont
import os
import numpy as np
import math

In [21]:
feature_table = pd.read_csv("170728_2015-01-01_alphasort.csv")

In [22]:
feature_table = feature_table[feature_table.feature_importance>0.00135]
feature_table.reset_index(drop=True,inplace=True)
top_feats = list(feature_table.feature)

In [23]:
len(top_feats)

18

In [24]:
top_feats

['demarrests_id_p5y_populationdensity_avg',
 'dispatch_id_p1m_dispatchinitiatiationtype_ci_sum',
 'dispatch_id_p1y_dispatchinitiatiationtype_ci_sum',
 'dispatch_id_p3m_dispatchinitiatiationtype_ci_sum',
 'dispatch_id_p6m_dispatchinitiatiationtype_ci_sum',
 'dispatch_id_p3m_dispatchtype_assist_sum',
 'dispatch_id_p6m_dispatchtype_assist_sum',
 'dispatch_id_p3m_dispatchtype_disorder_sum',
 'dispatch_id_p6m_dispatchtype_disorder_sum',
 'dispatch_id_p3m_dispatchtype_domestic_disturb_sum',
 'dispatch_id_p3m_dispatchtype_theft_sum',
 'dispatch_id_p1m_dispatchtype_traffic_sum',
 'dispatch_id_p3m_dispatchtype_traffic_sum',
 'ocag_id_all_officerage_max',
 'ts_id_p1m_trafficstops_count',
 'ts_id_p3m_trafficstops_count',
 'ts_id_p3m_trafficstopsbyrace_black_sum',

#### train matrix

In [25]:
train_matrix = pd.read_csv("170728_2015-01-01_train-matrix-and-labels.csv")
labels = train_matrix.outcome
#del train_matrix['outcome']

In [26]:
train_matrix = train_matrix[train_matrix['ocag_id_all_officerage_max']!=0]

In [27]:
train_matrix.outcome.value_counts()

0    7862
1     780
Name: outcome, dtype: int64

In [28]:
tmp = train_matrix[['ocag_id_all_officerage_max','outcome']]

In [29]:
prior = float(780)/7862

In [30]:
max(tmp.ocag_id_all_officerage_max) - min(tmp.ocag_id_all_officerage_max)

46.0

## Binning

In [31]:
def binticks_label(matrix,feat):
    tmp = matrix.loc[:,feat]
    tmpmin = min(tmp)
    tmpmax = max(tmp)
    tmprange = tmpmax - tmpmin
    step = tmprange / 100
    return np.arange(tmpmin,tmpmax+step,step)

In [32]:
def labelticks(matrix,feat):
    tmp = matrix.loc[:,feat]
    tmpmin = min(tmp)
    tmpmax = max(tmp)
    tmprange = tmpmax - tmpmin
    step = tmprange / 20
    return np.arange(tmpmin,tmpmax+step,step)

In [33]:
def minmax_label(matrix,feat):
    tmp = matrix.loc[:,feat]
    tmpmin = min(tmp)
    tmpmax = max(tmp)
    return tmpmin,tmpmax

### Axis labels

In [34]:
def addlabels(canvas,ticks):
    width = canvas.width
    height = canvas.height
    margin = 28
    newheight = height + margin
    
    unicanvas = Image.new('RGB',(width,newheight),(255,255,255))
    unicanvas.paste(canvas,(0,0))
    
    font = ImageFont.truetype('VeraMono.ttf', rectheight - 8 )
    
    # fontWidth, fontHeight = font.getsize(high) # can use if I have sizing issues

    draw = ImageDraw.Draw(unicanvas,'RGB')
    
    for i in range(len(ticks)):
        xpos = i * rectwidth * 5
        draw.text((xpos,newheight - 28 ),text=str(ticks[i]),font=font,fill=(0,0,0))
    
    return unicanvas

# Train plot

In [35]:
def updown(X,cfeat,rectwidth,rectheight,pad):
    bins = binticks_label(train_matrix,cfeat)
    nbins = len(bins)
    pos = pd.cut(X[labels==1].loc[:,cfeat],bins=bins,labels=False,include_lowest=True)
    neg = pd.cut(X[labels==0].loc[:,cfeat],bins=bins,labels=False,include_lowest=True)
    px_w = ( nbins - 1 ) * rectwidth # a hack; won't work with all subsets
        
    ###########
    ### POS ###
    ###########
    
    posbinmax = pos.value_counts().max()
    ppx_h = posbinmax * rectheight
    
    up = Image.new('RGB',(px_w,ppx_h),(255,255,255))
    updraw = ImageDraw.Draw(up)
    
    for binn in range(nbins):
        xpos = binn * rectwidth
        ypos = ppx_h - rectheight

        tmp = pos[pos==binn]
        for i in tmp.index:
            bbox = [(xpos+pad,ypos+pad),(xpos+rectwidth-pad,ypos+rectheight-pad)]
            updraw.rectangle(bbox,fill='hsl(0,0%,50%)',outline=None)
            ypos = ypos - rectheight
        
        actual_positives = len(tmp)
        n = actual_positives + len(neg[neg==binn])
        expected_positives = int( n * float(prior) )
        lift = actual_positives - expected_positives
        
        if lift > 0:
            ypos = ppx_h - lift*rectheight
        else:
            ypos = ppx_h
            
        coords = [(xpos,ypos),(xpos+rectwidth,ypos)]
        updraw.line(coords,fill=(255,166,77),width=6)
        #updraw.line(coords,fill=(255,128,0),width=6)

        try:
            coords = [(last_xpos+rectwidth,last_ypos),(xpos,ypos)]
            updraw.line(coords,fill=(255,166,77),width=6)
            #updraw.line(coords,fill=(255,128,0),width=6)
        except:
            pass

        last_ypos = ypos
        last_xpos = xpos
        
    up = addlabels(up,labelticks(train_matrix,cfeat))
    
    ###########
    ### NEG ###
    ###########
    
    # reset for line thing
    last_ypos = None
    last_xpos = None
    
    negbinmax = neg.value_counts().max()
    
    npx_h = negbinmax * rectheight
    
    down = Image.new('RGB',(px_w,npx_h),(255,255,255))
    downdraw = ImageDraw.Draw(down)
    
    for binn in range(nbins):
        xpos = binn * rectwidth
        ypos = npx_h - rectheight
                
        tmp = neg[neg==binn]
        for i in tmp.index:
            bbox = [(xpos+pad,ypos+pad),(xpos+rectwidth-pad,ypos+rectheight-pad)]
            downdraw.rectangle(bbox,fill='hsl(0,0%,50%)',outline=None)
            ypos = ypos - rectheight
               
        actual_positives = len(pos[pos==binn])
        n = actual_positives + len(tmp)
        expected_positives = int( n * float(prior) )
        lift = actual_positives - expected_positives        
                
        if lift < 0:
            ypos = npx_h - abs(lift)*rectheight
        else:
            ypos = npx_h
            
        coords = [(xpos,ypos),(xpos+rectwidth,ypos)]
        downdraw.line(coords,fill=(255,166,77),width=6)
        #downdraw.line(coords,fill=(255,128,0),width=6)
        
        try:
            coords = [(last_xpos+rectwidth,last_ypos),(xpos,ypos)]
            downdraw.line(coords,fill=(255,166,77),width=6)
            #downdraw.line(coords,fill=(255,128,0),width=6)
        except:
            pass
        
        last_ypos = ypos
        last_xpos = xpos
    
    unicanvas = Image.new('RGB',(px_w,up.height+npx_h),(255,255,255))
    unicanvas.paste(up,(0,0))

    down_flipped = down.transpose(Image.FLIP_TOP_BOTTOM)
    unicanvas.paste(down_flipped,(0,up.height))

    return unicanvas

In [36]:
pad = 1

In [37]:
rectwidth = 28
rectheight = 28

In [38]:
for cfeat in top_feats:
    im = updown(train_matrix,cfeat,rectwidth,rectheight,pad)
    im.save("/Users/damoncrockett/Desktop/tmpwintour/PILplatz/lift/"+cfeat+".png")

