In [201]:
import numpy as np   
from sklearn.metrics import accuracy_score
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import ShuffleSplit
from sklearn.neural_network import MLPClassifier
import time
import pandas as pd

In [202]:
def read_csv(f):
    # use ";" to separate
    data_list = pd.read_csv(f,sep=";")       
    return data_list   

In [203]:
def cross_validation_split(df):
    # get the data and output
    X = df.values[:, :-1]
    Y = df.values[:,-1]

    # do some encoding before using fit
    # fit() does not accept Strings 
    # LabelEncoder : turn your string into incremental value
    le = preprocessing.LabelEncoder()
    for i in range(df.shape[1]-1):
        X[:,i] = le.fit_transform(X[:,i])
   
    # random split data to 4 pieces, the test size is .25
    # want to use cross validation
    rs = ShuffleSplit(n_splits=4, test_size=.25)
    rs_list = rs.split(X)      
    
    return X, Y, rs_list

In [204]:
def ANN(*data):
    X, Y, data_index = data
    data_index_list = list(data_index)
    score_list = []
    tic = time.clock()
    
    for train_index, test_index in data_index_list:
        
        cross_tic = time.clock()
        
        # Standardize features by removing the mean and scaling to unit variance
        scaler = StandardScaler()
        scaler.fit(X[train_index])
        X[train_index] = scaler.transform(X[train_index])
        X[test_index] = scaler.transform(X[test_index]) 
        
        mlp = MLPClassifier(hidden_layer_sizes=(15,15))
        mlp.fit(X[train_index],Y[train_index])
        predictions = mlp.predict(X[test_index])
        
        s = accuracy_score(Y[test_index],predictions)
        score_list.append(s)
        print("The cross validation score in part is {}".format(s))
        print("The training of part costs {} s".format(time.clock()-cross_tic))
        print("=====================NEXT PART=====================")
                
        # the weight matrices that constitute the model parameters
#         a = [coef.shape for coef in mlp.coefs_]
#         print(a)

    score = np.mean(score_list)
    print("\nThe prediction accuracy score of mean is {}".format(score))
    print('Time for training spent {} secs' .format(time.clock()-tic))

In [205]:
def main():
    csv_file = "C:/Users/user/Desktop/bank-additional/bank-additional-full.csv"
    csv_df = read_csv(csv_file)
    X, Y, rs_list = cross_validation_split(csv_df)
    ANN(X, Y, rs_list)

In [206]:
if __name__=="__main__":
    main()



The cross validation score in part is 0.9143439836845684
The training of part costs 5.6262627174305635 s
The cross validation score in part is 0.91162474507138
The training of part costs 7.7395119017387515 s
The cross validation score in part is 0.9090026221229485
The training of part costs 7.722876011127937 s
The cross validation score in part is 0.9097795474410022
The training of part costs 7.448048636697877 s

The prediction accuracy score of mean is 0.9111877245799748
Time for training spent 28.536897010503708 secs
