### Train and test three classifiers using scikit learn libraries/algorithms in Python
##### House Loan example, Binary classification

1. import libraries and modules
2. import dataset using pandas
3. pre-process the dataset (missing data, normalization, label encoding, imbalanced data, train/test split)
4. train models
5. test models

In [10]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression

from sklearn.metrics import roc_auc_score, accuracy_score
from sklearn.model_selection import train_test_split

In [11]:
loan = pd.read_csv("HouseLoan.csv")
loan.describe()

Unnamed: 0,Feat1,Feat2
count,30.0,30.0
mean,30.3,111533.333333
std,8.047917,23247.889535
min,18.0,80000.0
25%,24.25,95250.0
50%,28.0,105000.0
75%,35.0,120750.0
max,51.0,175000.0


In [12]:
# split into features (predictors) and target/label using pandas
loan_feat = loan.iloc[:,0:2] # features
loan_targ = loan.iloc[:,2] #target.label

from sklearn.preprocessing import LabelEncoder, OneHotEncoder

loan_targLE = LabelEncoder().fit_transform(loan['Loan'])

In [13]:
RFclass = RandomForestClassifier() # tune hyperparametesusing cross-validation
NNclass = MLPClassifier()
LRclass = LogisticRegression()

In [14]:
# train test split
loanTrainX,loanTestX,loanTrainY,loanTestY = train_test_split(loan_feat, loan_targLE, test_size=0.4)

# fit training data, train models in training data
RFclass.fit(loanTrainX, loanTrainY)
NNclass.fit(loanTrainX, loanTrainY)
LRclass.fit(loanTrainX, loanTrainY)

# ground truth, test data, actual test output, historical, we have it, it exists
# carry out predictions on test dataset
RFpredLoan = RFclass.predict(loanTestX)
NNpredLoan = NNclass.predict(loanTestX)
LRpredLoan = LRclass.predict(loanTestX)

# Error metrics using roc_auc score
print('RF classifier accuracy score:', roc_auc_score(loanTestY, RFpredLoan))
print('NN classifier accuracy score:', roc_auc_score(loanTestY, NNpredLoan))
print('LR classifier accuracy score:', roc_auc_score(loanTestY, LRpredLoan))

# Error metrics using accuracy score
print('RF classifier accuracy score:', accuracy_score(loanTestY, RFpredLoan))
print('NN classifier accuracy score:', accuracy_score(loanTestY, NNpredLoan))
print('LR classifier accuracy score:', accuracy_score(loanTestY, LRpredLoan))

RF classifier accuracy score: 0.8571428571428572
NN classifier accuracy score: 0.5
LR classifier accuracy score: 0.5
RF classifier accuracy score: 0.8333333333333334
NN classifier accuracy score: 0.5833333333333334
LR classifier accuracy score: 0.4166666666666667
