In [33]:
import pyodbc
import pandas as pd
import config as cfg

In [34]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import accuracy_score
%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt

In [35]:
cnxn = pyodbc.connect( 'DRIVER={ODBC Driver 13 for SQL Server};SERVER=' + cfg.mssql['server'] + ';DATABASE=' 
                      + cfg.mssql['database'] + ';UID=' + cfg.mssql['username'] + ';PWD=' + cfg.mssql['password'] )

In [36]:
query = "SELECT * FROM BankView WHERE [State]='TX';"
data = pd.read_sql(query, cnxn, index_col='BankID')
data.head()

Unnamed: 0_level_0,UniqueNum,Name,Address1,Address2,City,State,Zip,Deposit,Lat,Lng,...,ClosestPSDistance,MeanPSDistance,PSCount,Take,PDistance,Officers1000,FFLCount,Target,Population,CrimeRate1000
BankID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,6371,"Austin Bank, Texas National Association",169 S. Frankston Hwy,,Frankston,TX,75763,89823000,32.159764,-95.455885,...,8.008239,8.073403,2,44911,0.0,0.0,3,,1163.0,42.13
2,221632,"Austin Bank, Texas National Association",2745 South Loop 256,,Palestine,TX,75801,74868000,31.733813,-95.623431,...,2.158156,2.158156,1,37434,0.484184,1.772727,6,,18194.0,39.13
3,16168,"Capital One, National Association",2121 South Loop 256,,Palestine,TX,75801,105194000,31.735576,-95.612655,...,2.316097,2.316097,1,52597,0.46839,1.772727,6,,18194.0,39.13
4,2196,Citizens National Bank,207 West Spring Street,,Palestine,TX,75801,92886000,31.761967,-95.633062,...,0.134378,0.134378,1,46443,0.686562,1.772727,6,,18194.0,39.13
5,569848,"Commercial Bank of Texas, National Association",109 West Parker St,,Elkhart,TX,75839,33338000,31.625053,-95.579769,...,10.5,10.5,0,16669,0.0,0.0,1,,,


In [37]:
data['CrimeRate1000'].mean()

36.84859592711564

In [38]:
data['Population'].mean()

598919.9247945695

In [39]:
data.isnull().sum()

UniqueNum               0
Name                    0
Address1                0
Address2                0
City                    0
State                   0
Zip                     0
Deposit                 0
Lat                     8
Lng                     8
ClosestStationID      419
ClosestPSDistance       0
MeanPSDistance          0
PSCount                 0
Take                    0
PDistance               0
Officers1000            0
FFLCount                0
Target               6507
Population            909
CrimeRate1000         909
dtype: int64

In [40]:
values = {'CrimeRate1000': data['CrimeRate1000'].mean(), 'Population': data['Population'].mean()}
data.fillna(value=values, inplace=True)
data.shape

(6507, 21)

## Use Model to Predict banks to Target

In [41]:
try:
    import cPickle as pickle
except ImportError:
    import pickle
    
## Load Model
model_filepath = 'targetbanks_randomforestclassifier.pkl'
in_logreg = open(model_filepath, 'rb')
rfc = pickle.load(in_logreg)
in_logreg.close()

In [42]:
feature_cols = ['ClosestPSDistance', 'Take', 'PDistance', 'Officers1000',
       'FFLCount', 'Population', 'CrimeRate1000']
X = data[feature_cols]
data['y_pred'] = rfc.predict(X)

In [43]:
# Total TARGETABLE banks
tgtsum = data[data.y_pred == 1].Name.count();
total = data.Name.count()
print(tgtsum, 'out of', total, ' pct:', round((tgtsum/total * 100)), '%')

937 out of 6507  pct: 14.0 %


## Show Result

In [44]:
# Drop columns
dftarget = data[data.y_pred == 1]
dftarget.drop(['UniqueNum', 
       'Deposit', 'Lat', 'Lng', 'ClosestStationID',
       'MeanPSDistance', 'PSCount', 'Target', 'Population'], axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


## Top 10 banks with highest Take

In [45]:
# Top 10 banks with highest Take
dftop10 = dftarget.sort_values(by=['Take'], ascending=[False]).head(10)
dftop10

Unnamed: 0_level_0,Name,Address1,Address2,City,State,Zip,ClosestPSDistance,Take,PDistance,Officers1000,FFLCount,CrimeRate1000,y_pred
BankID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
246,"Citibank, National Association",100 Citibank Drive,,San Antonio,TX,78245,7.244339,2088000,0.0,0.0,6,59.25,1
1037,LegacyTexas Bank,5851 Legacy Circle,,Plano,TX,75024,3.452618,1638172,0.354738,1.39677,33,20.44,1
601,"Wells Fargo Bank, National Association",9821 Broadway,,Pearland,TX,77584,4.944473,1099152,0.205553,2.318182,16,19.93,1
1743,"TIB The Independent Bankersbank, National Asso...",11701 Luna Road,,Dallas,TX,75234,4.074525,742480,0.292548,2.84,27,41.9,1
6189,Compass Bank,700 San Bernardo Avenue,,Laredo,TX,78040,3.121091,713798,0.387891,2.0,29,34.25,1
2758,"Bank of America, National Association",2301 Fm 1960 West,,Houston,TX,77068,10.5,672450,0.0,0.0,34,53.76,1
4365,PlainsCapital Bank,5010 University Avenue,,Lubbock,TX,79413,3.050037,610765,0.394996,1.584158,54,61.09,1
6093,First State Bank of Uvalde,200 East Nopal Street,,Uvalde,TX,78801,1.357246,606016,0.564275,1.69697,7,41.4,1
898,"Beal Bank, SSB",6000 Legacy Drive,,Plano,TX,75024,3.376343,528698,0.362366,1.39677,31,20.44,1
4324,City Bank,5219 City Bank Parkway,,Lubbock,TX,79407,4.924077,477205,0.207592,1.584158,58,61.09,1


## Top 10 of banks with highest distance to Police Station

In [46]:
# Top 10 of banks with highest distance to Police Station
dftop10 = dftarget.sort_values(by=['ClosestPSDistance'], ascending=False).head(10)
dftop10

Unnamed: 0_level_0,Name,Address1,Address2,City,State,Zip,ClosestPSDistance,Take,PDistance,Officers1000,FFLCount,CrimeRate1000,y_pred
BankID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
6480,The City National Bank of Sulphur Springs,2454 South Fm 2869,,Hawkins,TX,75765,10.5,12388,0.0,0.0,3,6.94,1
485,Johnson City Bank,100 E Pecan St,,Johnson City,TX,78636,10.5,52310,0.0,0.0,5,3.46,1
3093,Independent Bank,11390 Veterans Memorial,,Houston,TX,77067,10.5,955,0.0,0.0,27,53.76,1
3211,"JPMorgan Chase Bank, National Association",12350 Fm 1960 Rd W,,Houston,TX,77065,10.5,52893,0.0,0.0,30,53.76,1
3212,"JPMorgan Chase Bank, National Association",13103 Fm 1960 West,,Houston,TX,77065,10.5,109939,0.0,0.0,32,53.76,1
3215,"JPMorgan Chase Bank, National Association",4081 Fm 1960 West,,Houston,TX,77068,10.5,72751,0.0,0.0,35,53.76,1
3216,"JPMorgan Chase Bank, National Association",4165 Fm 1960 Road West,,Houston,TX,77068,10.5,55065,0.0,0.0,35,53.76,1
3218,"JPMorgan Chase Bank, National Association",7611 Fm 1960 West,,Houston,TX,77070,10.5,39749,0.0,0.0,43,53.76,1
3220,"JPMorgan Chase Bank, National Association",12214 Jones Road,,Houston,TX,77070,10.5,30936,0.0,0.0,36,53.76,1
3246,"JPMorgan Chase Bank, National Association",8536 Hwy 6 N,,Houston,TX,77095,10.5,29059,0.0,0.0,32,53.76,1


## Sort by lowest officers rate per 1000

In [47]:
#Sort by lowest officers rate per 1000
dftop10 = dftarget.sort_values(by=['Officers1000'], ascending=True).head(10)
dftop10

Unnamed: 0_level_0,Name,Address1,Address2,City,State,Zip,ClosestPSDistance,Take,PDistance,Officers1000,FFLCount,CrimeRate1000,y_pred
BankID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,"Austin Bank, Texas National Association",169 S. Frankston Hwy,,Frankston,TX,75763,8.008239,44911,0.0,0.0,3,42.13,1
3365,Regions Bank,1750 West Lake Houston Parkway,,Kingwood,TX,77339,6.114838,5229,0.088516,0.0,21,36.848596,1
3395,The Chasewood Bank,"20333 State Hwy 249, Suite 100",,Houston,TX,77070,7.864581,44569,0.0,0.0,39,53.76,1
3412,Trustmark National Bank,6809 Fm 1960 West,,Houston,TX,77069,10.5,47442,0.0,0.0,42,53.76,1
3425,"Wells Fargo Bank, National Association",811 Sheldon Rd,,Channelview,TX,77530,6.130952,55341,0.086905,0.0,15,36.848596,1
3428,"Wells Fargo Bank, National Association",13150 Louetta Road,,Cypress,TX,77429,6.935804,66998,0.00642,0.0,30,36.848596,1
3446,"Wells Fargo Bank, National Association",1350 W 43rd St.,,Houston,TX,77018,5.709191,27864,0.129081,0.0,56,53.76,1
3467,"Wells Fargo Bank, National Association","10261 North Freeway, Suite 500",,Houston,TX,77037,10.5,19447,0.0,0.0,19,53.76,1
3484,"Wells Fargo Bank, National Association",12941 North Freeway,,Houston,TX,77060,9.585292,119031,0.0,0.0,21,53.76,1
3489,"Wells Fargo Bank, National Association",9577 Jones Road,,Houston,TX,77065,9.610216,56114,0.0,0.0,33,53.76,1
