In [1]:
import pyodbc
import pandas as pd
import config as cfg

In [2]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import accuracy_score
%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt

In [3]:
cnxn = pyodbc.connect( 'DRIVER={ODBC Driver 13 for SQL Server};SERVER=' + cfg.mssql['server'] + ';DATABASE=' 
                      + cfg.mssql['database'] + ';UID=' + cfg.mssql['username'] + ';PWD=' + cfg.mssql['password'] )

In [4]:
query = "SELECT * FROM BankView WHERE [State]='NY';"
data = pd.read_sql(query, cnxn, index_col='BankID')
data.head()

Unnamed: 0_level_0,UniqueNum,Name,Address1,Address2,City,State,Zip,Deposit,Lat,Lng,...,ClosestPSDistance,MeanPSDistance,PSCount,Take,PDistance,Officers1000,FFLCount,Target,Population,CrimeRate1000
BankID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
6509,590864,Ballston Spa National Bank,1973 Western Ave,,Albany,NY,12203,1917000,42.695,-73.8794,...,3.26185,7.330062,11,958,0.373815,1.09375,8,,98617.0,43.744993
6510,580447,Ballston Spa National Bank,1207 Troy Schenectady Rd,,Latham,NY,12110,3875000,42.7725,-73.823,...,2.947765,6.917659,14,1937,0.405223,1.425641,8,,,
6511,4660,"Bank of America, National Association",69 State Street,,Albany,NY,12201,435463000,42.6499,-73.7522,...,0.986112,5.144061,9,217731,0.601389,2.820146,5,,98617.0,43.744993
6512,213160,"Bank of America, National Association",1450 Western Avenue,,Albany,NY,12203,97728000,42.6808,-73.8373,...,4.095179,7.046373,12,48864,0.290482,2.341346,10,,98617.0,43.744993
6513,214981,"Bank of America, National Association",1791 Western Avenue,,Albany,NY,12203,62203000,42.6905,-73.8662,...,3.999549,7.621675,13,31101,0.300045,1.318182,10,,98617.0,43.744993


In [5]:
data['CrimeRate1000'].mean()

29.42786864578506

In [6]:
data['Population'].mean()

3629207.8229166665

In [7]:
data.isnull().sum()

UniqueNum               0
Name                    0
Address1                0
Address2                0
City                    0
State                   0
Zip                     0
Deposit                 0
Lat                     9
Lng                     9
ClosestStationID      233
ClosestPSDistance       0
MeanPSDistance          0
PSCount                 0
Take                    0
PDistance               0
Officers1000            0
FFLCount                0
Target               5033
Population           3401
CrimeRate1000        3401
dtype: int64

In [8]:
values = {'CrimeRate1000': data['CrimeRate1000'].mean(), 'Population': data['Population'].mean()}
data.fillna(value=values, inplace=True)
data.shape

(5033, 21)

## Use Model to Predict banks to Target

In [9]:
try:
    import cPickle as pickle
except ImportError:
    import pickle
    
## Load Model
model_filepath = 'targetbanks_randomforestclassifier.pkl'
in_logreg = open(model_filepath, 'rb')
rfc = pickle.load(in_logreg)
in_logreg.close()

In [10]:
feature_cols = ['ClosestPSDistance', 'Take', 'PDistance', 'Officers1000',
       'FFLCount', 'Population', 'CrimeRate1000']
X = data[feature_cols]
data['y_pred'] = rfc.predict(X)

In [11]:
# Total TARGETABLE banks
tgtsum = data[data.y_pred == 1].Name.count();
total = data.Name.count()
print(tgtsum, 'out of', total, ' pct:', round((tgtsum/total * 100)), '%')

1953 out of 5033  pct: 39.0 %


## Show Result

In [12]:
dftarget = data[data.y_pred == 1]
dftarget.drop(['UniqueNum', 
       'Deposit', 'Lat', 'Lng', 'ClosestStationID',
       'MeanPSDistance', 'PSCount', 'Target', 'Population'], axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


## Top 10 banks with highest Take

In [13]:
# Top 10 banks with highest Take
dftop10 = dftarget.sort_values(by=['Take'], ascending=[False]).head(10)
dftop10

Unnamed: 0_level_0,Name,Address1,Address2,City,State,Zip,ClosestPSDistance,Take,PDistance,Officers1000,FFLCount,CrimeRate1000,y_pred
BankID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
8936,"JPMorgan Chase Bank, National Association",401 Madison Avenue,,New York,NY,10017,3.320694,174260736,0.367931,5.0,10,20.357615,1
8850,"HSBC Bank USA, National Association",452 Fifth Avenue,,New York,NY,10018,2.940012,43650604,0.405999,5.0,10,20.357615,1
8934,"JPMorgan Chase Bank, National Association",401 Madison Avenue,,New York,NY,10017,3.320694,33282569,0.367931,5.0,10,20.357615,1
8639,"Bank of America, National Association",115 West 42nd Street,,New York,NY,10036,3.113936,22024089,0.388606,5.0,9,20.357615,1
8768,"Citibank, National Association",399 Park Avenue,,New York,NY,10022,3.602696,12862500,0.33973,5.0,10,20.357615,1
8652,Bank of China,1045 Avenue Of The Americas,,New York,NY,10018,2.969544,11522011,0.403046,5.0,9,20.357615,1
8927,"JPMorgan Chase Bank, National Association",349 Fifth Avenue And 34th Street,,New York,NY,10016,2.658231,7820909,0.434177,5.0,10,20.357615,1
9209,"Wells Fargo Bank, National Association",437 Madison Avenue,,New York,NY,10022,3.422193,7128784,0.357781,5.0,10,20.357615,1
8817,First Republic Bank,1230 Avenue Of The Americas,,New York,NY,10020,3.401485,4617530,0.359852,5.0,10,20.357615,1
8684,"Capital One, National Association",57 West 57th Street,,New York,NY,10019,3.822906,4485512,0.317709,5.0,10,20.357615,1


## Top 10 of banks with highest distance to Police Station

In [14]:
# Top 10 of banks with highest distance to Police Station
dftop10 = dftarget.sort_values(by=['ClosestPSDistance'], ascending=False).head(10)
dftop10

Unnamed: 0_level_0,Name,Address1,Address2,City,State,Zip,ClosestPSDistance,Take,PDistance,Officers1000,FFLCount,CrimeRate1000,y_pred
BankID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
7027,"NBT Bank, National Association",147 Maple Avenue,,Hobart,NY,13788,10.5,35472,0.0,0.0,2,29.427869,1
10654,"Capital One, National Association",160 Main St,,Sayville,NY,11782,10.5,60248,0.0,0.0,5,29.427869,1
10998,Jeff Bank,4864 State Route 52,,Jeffersonville,NY,12748,10.5,51408,0.0,0.0,4,29.427869,1
10790,"JPMorgan Chase Bank, National Association",645 William Floyd Pkwy,,Shirley,NY,11967,10.5,50691,0.0,0.0,1,29.427869,1
10796,"JPMorgan Chase Bank, National Association",595 Sunrise Highway,,West Babylon,NY,11704,10.5,112956,0.0,0.0,9,29.427869,1
7043,Bank of Millbrook,3263 Franklin Avenue,,Millbrook,NY,12545,10.5,57029,0.0,0.0,6,29.427869,1
10213,Northfield Bank,6420 Amboy Road,,Staten Island,NY,10309,10.5,44638,0.0,0.0,1,29.427869,1
10643,"Capital One, National Association",300 South Wellwood Avenue,,Lindenhurst,NY,11757,10.5,50006,0.0,0.0,9,29.427869,1
7028,"NBT Bank, National Association",723 Main Street,,Margaretville,NY,12455,10.5,50372,0.0,0.0,2,29.427869,1
10678,"Citibank, National Association",1198 Deer Park Avenue,,North Babylon,NY,11703,10.5,83000,0.0,0.0,5,29.427869,1


## Sort by lowest officers rate per 1000

In [15]:
#Sort by lowest officers rate per 1000
dftop10 = dftarget.sort_values(by=['Officers1000'], ascending=True).head(10)
dftop10

Unnamed: 0_level_0,Name,Address1,Address2,City,State,Zip,ClosestPSDistance,Take,PDistance,Officers1000,FFLCount,CrimeRate1000,y_pred
BankID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
8974,"JPMorgan Chase Bank, National Association","2551 Broadway, 96th Street",,New York,NY,10025,5.868097,240350,0.11319,0.0,9,20.357615,1
9917,"JPMorgan Chase Bank, National Association",9059 Sutphin Boulevard,,Jamaica,NY,11435,5.560809,23801,0.143919,0.0,4,29.427869,1
9912,"JPMorgan Chase Bank, National Association",161-10 Jamaica Avenue,,Jamaica,NY,11432,5.021555,71246,0.197845,0.0,5,29.427869,1
9911,"JPMorgan Chase Bank, National Association",37-67 75th Street,,Jackson Heights,NY,11372,6.348042,117773,0.065196,0.0,10,29.427869,1
9910,"JPMorgan Chase Bank, National Association",37-15 82nd Street,,Jackson Heights,NY,11372,6.710446,166473,0.028955,0.0,9,29.427869,1
9909,"JPMorgan Chase Bank, National Association",156-33 Cross Bay Boulevard,,Howard Beach,NY,11414,8.219902,88501,0.0,0.0,4,29.427869,1
9908,"JPMorgan Chase Bank, National Association","8239 153rd Ave, Ste B",,Howard Beach,NY,11414,8.537553,33226,0.0,0.0,4,29.427869,1
9902,"JPMorgan Chase Bank, National Association",9900 Metropolitan Avenue,,Forest Hills,NY,11375,7.799695,44900,0.0,0.0,5,29.427869,1
9901,"JPMorgan Chase Bank, National Association",104-17 Queens Boulevard,,Forest Hills,NY,11375,7.399366,96835,0.0,0.0,6,29.427869,1
8337,"JPMorgan Chase Bank, National Association",3737 Hempstead Turnpike,,Levittown,NY,11756,6.625972,39794,0.037403,0.0,11,29.427869,1
