In [None]:
import keras as ks
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
from numpy.linalg import norm, svd
from sklearn.decomposition import PCA

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras import optimizers
from sklearn.pipeline import Pipeline


#Random initialization
seed = 7
np.random.seed(seed)

#function definition
def pcaAnalysis(X, lmbda=.01, tol=1e-3,maxiter=100, verbose=True):
    
    Y = X
    norm_two = norm(Y.ravel(), 2)
    norm_inf = norm(Y.ravel(), np.inf) / lmbda
    dual_norm = np.max([norm_two, norm_inf])
    Y = Y / dual_norm
    A = np.zeros(Y.shape)
    E = np.zeros(Y.shape)
    dnorm = norm(X, 'fro')
    mu = 1.25 / norm_two
    rho = 1.5
    sv = 10.
    n = Y.shape[0]
    itr = 0
    while True:
        Eraw = X - A + (1 / mu) * Y
        Eupdate = np.maximum(Eraw - lmbda / mu, 0) + np.minimum(Eraw + lmbda / mu, 0)
        U, S, V = svd(X - Eupdate + (1 / mu) * Y, full_matrices=False)
        svp = (S > 1 / mu).shape[0]
        if svp < sv:
            sv = np.min([svp + 1, n])
        else:
            sv = np.min([svp + round(.05 * n), n])
        Aupdate = np.dot(np.dot(U[:, :svp], np.diag(S[:svp] - 1 / mu)), V[:svp, :])
        A = Aupdate
        E = Eupdate
        Z = X - A - E
        Y = Y + mu * Z
        mu = np.min([mu * rho, mu * 1e7])
        itr += 1
        if ((norm(Z, 'fro') / dnorm) < tol) or (itr >= maxiter):
            break
    if verbose:
        print("Finished at iteration %d" % (itr))  
    return A, E

def neural_network():
    model = Sequential()
    model.add(Dropout(0.5, input_shape=(10,)))
    model.add(Dense(12, input_dim=10, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(14000, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(7000, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))

    adam = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
    model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy'])

    #model.fit(X_train, y_train, epochs=50, batch_size=10)
    
    return model

dataTrain = pd.read_csv('cs-training.csv').drop('Unnamed: 0', axis = 1)
dataTest = pd.read_csv('cs-test.csv').drop('Unnamed: 0', axis = 1)

targetTrain = dataTrain['SeriousDlqin2yrs']
targetTest = dataTest['SeriousDlqin2yrs']

dataTrain = dataTrain.drop(['SeriousDlqin2yrs'],axis=1)
dataTest = dataTest.drop(['SeriousDlqin2yrs'],axis=1)

#Preprocessing

dataTrain = dataTrain.fillna(0.0)
dataTest = dataTest.fillna(0.0)
#scaler = StandardScaler()
#dataTrainNormalized = scaler.fit_transform(dataTrain)
#dataTestNormalized = scaler.fit_transform(dataTest)

#PCA
#dataTrainPCA = np.array(dataTrainNormalized)
#dataTestPCA = np.array(dataTestNormalized)

#pca = PCA(n_components=8)
#pca.fit(dataTrainPCA)
#PCA(copy=True, iterated_power='auto', n_components=1, random_state=None,
#  svd_solver='auto', tol=0.0, whiten=False)

#print(pca.transform(dataTrainPCA))
#print(dataTrainPCA)
#print(dataTestPCA)
#print(np.shape(dataTestPCA))
#print(np.shape(dataTrainPCA))
#sz = 8
#C, D = pcaAnalysis(dataTrainPCA[:,:sz])
#dataTrainPCA = C+D
#C, D = pcaAnalysis(dataTestPCA[:,:sz])
#dataTestPCA = C+D
#print(np.shape(dataTestPCA))
#print(np.shape(dataTrainPCA))
#print(np.shape(dataTestPCA))
#print(np.shape(dataTestPCA))

#Cross validation data split
#from sklearn.model_selection import train_test_split

#X_train, X_test, y_train, y_test = train_test_split(dataTrainNormalized, targetTrain, test_size=0.4, random_state=0)

#K fold validation
#from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
from sklearn import model_selection

#kfold = model_selection.KFold(n_splits=10, random_state=seed)
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)
# Neural Net Dropout

from keras.wrappers.scikit_learn import KerasClassifier

estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=neural_network, epochs=30, batch_size=16, verbose=1)))
pipeline = Pipeline(estimators)


#Evaluation
from sklearn.model_selection import cross_val_score
from sklearn.metrics import confusion_matrix

scores = cross_val_score(pipeline, dataTrain, targetTrain, cv=kfold)
#scores = model.evaluate(X_train, y_train)
conf_mat = confusion_matrix(targetTrain,scores)
print(conf_mat)
#print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

Epoch 1/30