In [1]:
from sklearn.neural_network import MLPClassifier
from sklearn import preprocessing
#from statsmodels.api import datasets
from sklearn import datasets ## Get dataset from sklearn
import sklearn.model_selection as ms
import sklearn.metrics as sklm
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import numpy.random as nr
import itertools

%matplotlib inline

In [2]:
bikesData = pd.read_csv('BikesProcessed.csv')
Labels = bikesData['BikeBuyer']
bikesData.drop(bikesData.columns[0], axis=1, inplace=True)
bikesData.drop(['BikeBuyer'], axis=1, inplace=True)
Features = np.array(bikesData)
print(Features.shape)
bikesData.head()

(16404, 12)


Unnamed: 0,HomeOwnerFlag,NumberCarsOwned,NumberChildrenAtHome,AreaCode,Sex,Married,JobType,EducationType,Country,Income,AgeBracket,ChildrenFlag
0,1,0,0,500,1,1,0,0,1,3,0,1
1,0,1,3,500,1,0,0,0,1,3,1,1
2,1,1,3,500,1,1,0,0,1,2,1,1
3,0,1,0,500,0,0,0,0,1,2,0,0
4,1,4,5,500,0,0,0,0,1,2,0,1


In [3]:
feature_combs = []

for i in range(1, 13):
    feature_combs.extend(list(itertools.combinations(list(bikesData), i)))         
print(len(feature_combs))

4095


In [4]:
#print("Training Set %\t Test Set %\t Features")
size = []
train_scores = []
test_scores = []
nr.seed(1115)
for comb in feature_combs:
    comb_features = np.array(bikesData[np.array(comb)])
    indx = range(comb_features.shape[0])
    indx = ms.train_test_split(indx, test_size = 5000, random_state=0)
    X_train = comb_features[indx[0],:]
    y_train = np.ravel(Labels[indx[0]])
    X_test = comb_features[indx[1],:]
    y_test = np.ravel(Labels[indx[1]])
    
    #print(bikesData[list(comb)])
    #X_train, X_test, y_train, y_test = ms.train_test_split(
    #    bikesData[list(comb)], Labels)
    
    #Rescale numeric features
    scaler = preprocessing.StandardScaler().fit(X_train)
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)

    nn_mod = MLPClassifier(random_state=0)
    nn_mod.fit(X_train, y_train)
    #print("%f" % nn_mod.score(X_train, y_train), "\t %f" % nn_mod.score(X_test, y_test), "\t", comb)
    train_scores.append(nn_mod.score(X_train, y_train))
    test_scores.append(nn_mod.score(X_test, y_test))
    size.append(len(comb))



In [5]:
midx = pd.DataFrame()
midx['size'] = size
midx['train_scores'] = train_scores
midx['test_scores'] = test_scores
midx['combs'] = feature_combs
midx.to_csv('NeuralNetwork.csv')

temp_Labels = Labels[Labels == 1] 
temp_Features = Features[Labels == 1,:]
temp_Features = np.concatenate((Features, temp_Features), axis = 0)
temp_Labels = np.concatenate((Labels, temp_Labels), axis = 0) 

print(temp_Features.shape)
print(temp_Labels.shape)

#Rescale numeric features
scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
print(X_train.shape)
X_train[:5,:]

nr.seed(123)
inside = ms.KFold(n_splits=3, shuffle = True)
nr.seed(321)
outside = ms.KFold(n_splits=3, shuffle = True)

## Define the dictionary for the grid search and the model object to search on
param_grid = {#"alpha":[0.0000001,0.000001,0.00001], 
              #"early_stopping":[True, False], 
              "beta_1":[0.95,0.90,0.80], 
              "beta_2":[0.999,0.9,0.8]}

## Define the Neural Network model
nn_clf = MLPClassifier(hidden_layer_sizes = (100,100),
                       max_iter=300)

## Perform the grid search over the parameters
nr.seed(3456)
nn_clf = ms.GridSearchCV(estimator = nn_clf, param_grid = param_grid, 
                      cv = inside, # Use the inside folds
                      scoring = 'recall',
                      return_train_score = True)

nr.seed(6677)
nn_clf.fit(Features, Labels)
#print(nn_clf.best_estimator_.alpha)
#print(nn_clf.best_estimator_.early_stopping)
print(nn_clf.best_estimator_.beta_1)
print(nn_clf.best_estimator_.beta_2)

nr.seed(498)
cv_estimate = ms.cross_val_score(nn_clf, Features, Labels, 
                                 cv = outside) # Use the outside folds

print('Mean performance metric = %4.3f' % np.mean(cv_estimate))
print('SDT of the metric       = %4.3f' % np.std(cv_estimate))
print('Outcomes by cv fold')
for i, x in enumerate(cv_estimate):
    print('Fold %2d    %4.3f' % (i+1, x))

## Randomly sample cases to create independent training and test data
nr.seed(1115)
indx = range(Features.shape[0])
indx = ms.train_test_split(indx, test_size = 5000)
X_train = Features[indx[0],:]
y_train = np.ravel(Labels[indx[0]])
X_test = Features[indx[1],:]
y_test = np.ravel(Labels[indx[1]])

#Rescale numeric features
scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
print(X_train.shape)
X_train[:5,:]

## Oversample the minority case for the training data
#y_temp = y_train[y_train == 1] 
#X_temp = X_train[y_train == 1,:]
#X_train = np.concatenate((X_train, X_temp), axis = 0)
#y_train = np.concatenate((y_train, y_temp), axis = 0) 

nr.seed(1115)
nn_mod = MLPClassifier(hidden_layer_sizes = (100,100), 
                       #alpha = nn_clf.best_estimator_.alpha, 
                       #early_stopping = nn_clf.best_estimator_.early_stopping, 
                       beta_1 = nn_clf.best_estimator_.beta_1, 
                       beta_2 = nn_clf.best_estimator_.beta_2,
                       max_iter = 300)
nn_mod.fit(X_train, y_train)

def score_model(probs, threshold):
    return np.array([1 if x > threshold else 0 for x in probs[:,1]])

def print_metrics(labels, probs, threshold):
    scores = score_model(probs, threshold)
    metrics = sklm.precision_recall_fscore_support(labels, scores)
    conf = sklm.confusion_matrix(labels, scores)
    print('                 Confusion matrix')
    print('                 Score positive    Score negative')
    print('Actual positive    %6d' % conf[0,0] + '             %5d' % conf[0,1])
    print('Actual negative    %6d' % conf[1,0] + '             %5d' % conf[1,1])
    print('')
    print('Accuracy        %0.2f' % sklm.accuracy_score(labels, scores))
    print('AUC             %0.2f' % sklm.roc_auc_score(labels, probs[:,1]))
    print('Macro precision %0.2f' % float((float(metrics[0][0]) + float(metrics[0][1]))/2.0))
    print('Macro recall    %0.2f' % float((float(metrics[1][0]) + float(metrics[1][1]))/2.0))
    print(' ')
    print('           Positive      Negative')
    print('Num case   %6d' % metrics[3][0] + '        %6d' % metrics[3][1])
    print('Precision  %6.2f' % metrics[0][0] + '        %6.2f' % metrics[0][1])
    print('Recall     %6.2f' % metrics[1][0] + '        %6.2f' % metrics[1][1])
    print('F1         %6.2f' % metrics[2][0] + '        %6.2f' % metrics[2][1])
    
probabilities = nn_mod.predict_proba(X_test)
print_metrics(y_test, probabilities, 0.5) 