In [242]:
import pandas as pd
from PIL import Image,ImageDraw,ImageFont
import os
import numpy as np
import math

In [243]:
feature_table = pd.read_csv("193398_feattable.csv")

In [244]:
ii = pd.read_csv("193398_individual_importances.csv")

In [245]:
feature_table = feature_table[feature_table.feature_importance>0.003]
feature_table.reset_index(drop=True,inplace=True)
top_feats = list(feature_table.feature)

In [246]:
len(top_feats)

21

In [247]:
top_feats

['ocag_id_all_officerage_max',
 'ts_id_p1y_trafficstopsbystoptype_equipment_violation_sum',
 'shifts_id_p5y_hourspershift_sum',
 'arrests_id_p5y_suspectsarrestedofrace_unknown_avg',
 'dispatch_id_p1y_dispatchinitiatiationtype_ci_sum',
 'dispatch_id_p1m_dispatchinitiatiationtype_ci_sum',
 'dispatch_id_p1m_dispatchtype_theft_sum',
 'dispatch_id_p1m_dispatchtype_assist_sum',
 'dispatch_id_p1m_dispatchtype_domestic_disturb_sum',
 'dispatch_id_p1m_dispatchtype_disorder_sum',
 'dispatch_id_p1m_dispatchtype_traffic_sum',
 'dispatch_id_p1m_dispatchtype_domestic_disturb_avg',
 'dispatch_id_p5y_dispatchtype_assault_avg',
 'dispatch_id_p1y_dispatchtype_assist_sum',
 'dispatch_id_p1y_dispatchtype_domestic_disturb_sum',
 'dispatch_id_p1y_dispatchtype_shooting_sum',
 'dispatch_id_p1y_dispatchtype_intoxicated_sum',
 'dispatch_id_p1y_dispatchtype_assault_sum',
 'dispatch_id_p1y_dispatchtype_drug_sum',
 'dispatch_id_p1y_dispatchtype_intoxicated_avg',
 'dispatch_id_p1w_dispatchinitiatiationtype_ci_sum']

#### train matrix

In [248]:
train_matrix = pd.read_csv("193398_train.csv")
labels = train_matrix.outcome
#del train_matrix['outcome']

In [249]:
train_matrix = train_matrix[train_matrix['ocag_id_all_officerage_max']!=0]

In [250]:
train_matrix.outcome.value_counts()

0    15256
1     1531
Name: outcome, dtype: int64

In [251]:
tmp = train_matrix[['ocag_id_all_officerage_max','outcome']]

In [252]:
prior = float(1531)/15256

In [253]:
max(tmp.ocag_id_all_officerage_max) - min(tmp.ocag_id_all_officerage_max)

46.0

In [254]:
ages = np.arange(21,68)
agelabels = np.arange(21,68,3)

## Binning

In [255]:
def binticks_label(matrix,feat):
    tmp = matrix.loc[:,feat]
    tmpmin = min(tmp)
    tmpmax = max(tmp)
    tmprange = tmpmax - tmpmin
    step = tmprange / 100
    return np.arange(tmpmin,tmpmax+step,step)

In [256]:
def labelticks(matrix,feat):
    tmp = matrix.loc[:,feat]
    tmpmin = min(tmp)
    tmpmax = max(tmp)
    tmprange = tmpmax - tmpmin
    step = tmprange / 20
    return np.arange(tmpmin,tmpmax+step,step)

In [257]:
def minmax_label(matrix,feat):
    tmp = matrix.loc[:,feat]
    tmpmin = min(tmp)
    tmpmax = max(tmp)
    return tmpmin,tmpmax

### Axis labels

In [268]:
def addlabels(canvas,ticks):
    width = canvas.width
    height = canvas.height
    margin = rectheight
    newheight = height + margin
    
    unicanvas = Image.new('RGBA',(width,newheight),(255,255,255,0))
    unicanvas.paste(canvas,(0,0))
    
    font = ImageFont.truetype('VeraMono.ttf', rectheight - 16 )
    
    # fontWidth, fontHeight = font.getsize(high) # can use if I have sizing issues

    draw = ImageDraw.Draw(unicanvas,'RGBA')
    
    for i in range(len(ticks)):
        xpos = i * rectwidth * 3
        #xpos = i * rectwidth * 5
        draw.text((xpos,newheight - margin + 15 ),text=str(ticks[i]),font=font,fill=(0,0,0))
    
    return unicanvas

# Train plot

In [269]:
def up(X,cfeat,rectwidth,rectheight,pad):
    bins = binticks_label(train_matrix,cfeat)
    nbins = len(bins)
    pos = pd.cut(X.loc[:,cfeat],bins=bins,labels=False,include_lowest=True)
    px_w = ( nbins - 1 ) * rectwidth # a hack; won't work with all subsets
        
    ###########
    ### POS ###
    ###########
    
    posbinmax = pos.value_counts().max()
    
    ppx_h = posbinmax * rectheight
    
    up = Image.new('RGB',(px_w,ppx_h),(255,255,255,0))
    updraw = ImageDraw.Draw(up)
    
    for binn in range(nbins):
        xpos = binn * rectwidth
        ypos = ppx_h - rectheight

        tmp = pos[pos==binn]
        for i in tmp.index:
            bbox = [(xpos+pad,ypos+pad),(xpos+rectwidth-pad,ypos+rectheight-pad)]
            updraw.rectangle(bbox,fill='hsl(0,0%,75%)',outline=None)
            ypos = ypos - rectheight
        
    up = addlabels(up,labelticks(train_matrix,cfeat))    
    updraw = ImageDraw.Draw(up) # will use it below

    return up

In [310]:
def updown(X,cfeat,rectwidth,rectheight,pad):
    #bins = binticks_label(train_matrix,cfeat)
    bins = ages
    nbins = len(bins)
    pos = pd.cut(X[labels==1].loc[:,cfeat],bins=bins,labels=False,include_lowest=True)
    neg = pd.cut(X[labels==0].loc[:,cfeat],bins=bins,labels=False,include_lowest=True)
    px_w = ( nbins - 1 ) * rectwidth # a hack; won't work with all subsets
        
    ###########
    ### POS ###
    ###########
    
    posbinmax = pos.value_counts().max()
    overmax = int( math.ceil( neg.value_counts().max() * prior ) )
    
    if posbinmax >= overmax:
        ppx_h = posbinmax * rectheight
    else:
        ppx_h = overmax * rectheight
    
    up = Image.new('RGBA',(px_w,ppx_h),(255,255,255,0))
    updraw = ImageDraw.Draw(up)
    
    for binn in range(nbins):
        xpos = binn * rectwidth
        ypos = ppx_h - rectheight

        tmp = pos[pos==binn]
        for i in tmp.index:
            bbox = [(xpos+pad,ypos+pad),(xpos+rectwidth-pad,ypos+rectheight-pad)]
            updraw.rectangle(bbox,fill='hsl(0,0%,50%)',outline=None)
            ypos = ypos - rectheight
        
        actual_positives = len(tmp)
        n = actual_positives + len(neg[neg==binn])
        expected_positives = int( n * float(prior) )
        lift = actual_positives - expected_positives
        
        if lift > 0:
            ypos = ppx_h - lift*rectheight
        else:
            ypos = ppx_h
            
        coords = [(xpos,ypos),(xpos+rectwidth,ypos)]
        #updraw.line(coords,fill=(220,101,113),width=6) # red

        try:
            coords = [(last_xpos+rectwidth,last_ypos),(xpos,ypos)]
            #updraw.line(coords,fill=(220,101,113),width=6)
        except:
            pass

        last_ypos = ypos
        last_xpos = xpos
        
    up = addlabels(up,agelabels)   
    #up = addlabels(up,labelticks(train_matrix,cfeat))    
    updraw = ImageDraw.Draw(up) # will use it below

    ###########
    ### NEG ###
    ###########
    
    # reset for line thing
    last_ypos = None
    last_xpos = None
    
    negbinmax = neg.value_counts().max()
    
    npx_h = negbinmax * rectheight
    
    down = Image.new('RGBA',(px_w,npx_h),(255,255,255,0))
    downdraw = ImageDraw.Draw(down)
    
    for binn in range(nbins):
        xpos = binn * rectwidth
        ypos = npx_h - rectheight
                
        tmp = neg[neg==binn]
        for i in tmp.index:
            bbox = [(xpos+pad,ypos+pad),(xpos+rectwidth-pad,ypos+rectheight-pad)]
            downdraw.rectangle(bbox,fill='hsl(0,0%,50%)',outline=None)
            ypos = ypos - rectheight
               
        actual_positives = len(pos[pos==binn])
        n = actual_positives + len(tmp)
        expected_positives = int( n * float(prior) )
        lift = actual_positives - expected_positives        
                
        if lift < 0:
            ypos = npx_h - abs(lift)*rectheight
        else:
            ypos = npx_h
            
        coords = [(xpos,ypos),(xpos+rectwidth,ypos)]
        #downdraw.line(coords,fill=(112,159,210),width=6)
        
        try:
            coords = [(last_xpos+rectwidth,last_ypos),(xpos,ypos)]
            #downdraw.line(coords,fill=(112,159,210),width=6)
        except:
            pass
        
        negbinheight = len(tmp)
        nneg = len(neg)
        npos = len(pos)
        overheight = int( negbinheight * npos / nneg )               
        ypos = ppx_h - overheight*rectheight # ppx_h bc plotting to 'up'

        coords = [(xpos,ypos),(xpos+rectwidth,ypos)]
        updraw.line(coords,fill=(50,103,159,190),width=16)
        
        try:
            coords = [(last_xpos+rectwidth,last_ypos),(xpos,ypos)]
            updraw.line(coords,fill=(50,103,159,190),width=16)
        except:
            pass
        
        last_ypos = ypos
        last_xpos = xpos
        
        # ep fill
        bbox = [(xpos,ypos),(xpos+rectwidth,npx_h)]
        #updraw.rectangle(bbox,fill=(112,159,210,96),outline=None)
    
    unicanvas = up
    #unicanvas = Image.new('RGB',(px_w,up.height+npx_h),(255,255,255))
    #unicanvas.paste(up,(0,0))

    #down_flipped = down.transpose(Image.FLIP_TOP_BOTTOM)
    #unicanvas.paste(down_flipped,(0,up.height))

    return unicanvas,up.height,px_w

In [311]:
pad = 2

In [312]:
rectwidth = 56
rectheight = 56

### Interaction binning

In [313]:
#ifeat = "arrests_id_p1y_arrestson_sat_avg"
cfeat = 'ocag_id_all_officerage_max'
#train_matrix['ifeat'] = pd.cut(train_matrix[ifeat],bins=3,labels=False,include_lowest=False)

In [314]:
#X = train_matrix[train_matrix.ifeat==1]

In [315]:
im,uph,px_w = updown(train_matrix,cfeat,rectwidth,rectheight,pad)
#im = up(train_matrix,cfeat,rectwidth,rectheight,pad)



In [316]:
#height = 2160
#cropbox = [0,uph-height,px_w,uph+height]
#imcrop = im.crop(cropbox)

In [317]:
#imcrop.save("/Users/damoncrockett/Desktop/"+cfeat+"_"+ifeat+"bin1"+"_ep.png")
im.save("/Users/damoncrockett/Desktop/"+cfeat+".png")