In [1]:
#importing packages
import pandas as pd
import os
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
import random
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score
from sklearn.linear_model import SGDClassifier
import scipy.linalg as scl

In [2]:
def sigmoid(s):
    return 1./(1.+np.exp(-s))
def sigmoid_backprop(da,x):
    sig = sigmoid(x)
    return da*sig*(1-sig)

In [3]:
class simple:
    def __init__(
            self,
            XTrain,
            yTrain,
            eta,
            nodes_per_layer,
            activation_per_layer,
            epochs = 10000,
            seed=99
        ):
        self.XTrain = XTrain
        self.yTrain = yTrain.T
        self.seed   = seed
        self.nodes_per_layer = nodes_per_layer
        self.activation_per_layer = activation_per_layer
        self.eta = eta
        self.epochs = epochs
        
    def initialize(
            self
        ):
        np.random.seed(self.seed)
        number_of_layers = len(self.activation_per_layer)
        self.nodes_per_layer = np.hstack((self.XTrain.shape[1],self.nodes_per_layer))
        self.parameter_values={}
        for idx in range(len(self.activation_per_layer)):
            layer_idx = idx+1
            layer_input_size = self.nodes_per_layer[idx]
            layer_output_size = self.nodes_per_layer[idx+1]
            np.random.seed(self.seed)
            self.parameter_values['W'+str(layer_idx)] = np.random.randn(
                                layer_output_size,layer_input_size)*0.1
            self.parameter_values['b'+str(layer_idx)] = np.random.randn(
                                layer_output_size,1)*0.1
    
    
    def forward_propagation(
        self,
        X
        ):
        self.layer_dict = {}
        self.a_current = X.T
        
        for idx in range(len(self.activation_per_layer)):
            layer_idx = idx+1
            self.a_previous = self.a_current
            self.W_current = self.parameter_values["W"+str(layer_idx)]
            self.b_current = self.parameter_values["b"+str(layer_idx)]
            self.z_current = self.W_current@self.a_previous+self.b_current
            if self.activation_per_layer[idx] == 'sigmoid':
                self.a_current = sigmoid(self.z_current)
            else:
                print("Unknown activation function")
            self.layer_dict["a"+str(idx)] = self.a_previous
            self.layer_dict["z"+str(layer_idx)] = self.z_current
        return self.a_current
    
    def backward_propagation(
        self
        ):
        self.gradient_values={}
        m = self.a_previous.shape[1]
        da_previous = -(np.divide(self.yTrain,self.a_current)-np.divide(1-self.yTrain,1-self.a_current))

        for layer_idx_previous in reversed(range(len(self.activation_per_layer))):
            layer_idx_current = layer_idx_previous + 1
            da_current = da_previous

            a_previous = self.layer_dict["a" + str(layer_idx_previous)]
            z_current = self.layer_dict["z" + str(layer_idx_current)]
            W_current = self.parameter_values["W" + str(layer_idx_current)]
            b_current = self.parameter_values["b" + str(layer_idx_current)]

            n = a_previous.shape[1]
            if self.activation_per_layer[layer_idx_current-1] == 'sigmoid':
                dz_current = sigmoid_backprop(da_current,z_current)
            else:
                print("Unknown activation function")
            dW_current = dz_current@a_previous.T/n
            db_current = np.sum(dz_current,axis=1,keepdims=True)/n
            da_previous = W_current.T@dz_current

            self.gradient_values["dW" + str(layer_idx_current)] = dW_current
            self.gradient_values["db" + str(layer_idx_current)] = db_current 
            
    ### COST STUFF
    def cross_entropy(
        self,
        y_pred,
        y_real
        ):
        cost = (-1./y_pred.shape[1])*((y_real@(np.log(y_pred).T)+(1-y_real)@(np.log(1-y_pred)).T))
        return np.squeeze(cost[:,np.newaxis]) 
    
    def accuracy(
        self,
        y_pred,
        y_real
        ):
        if y_pred.shape[0] != 1:
            y_pred = y_pred.reshape(1,y_pred.shape[0])
        y_real = y_real.reshape(y_pred.shape)
        y_pred_classed = np.copy(y_pred)
        y_pred_classed[y_pred_classed>0.5]=1
        y_pred_classed[y_pred_classed<=0.5]=0
        return (y_pred_classed==y_real).all(axis=0).mean()
    
    ## UPDATE WEIGHTS AND BIASES
    def update(
        self
        ):
#         for layer_idx, layer in enumerate(self.nn_architecture,1):
        for layer_idx in range(len(activation_per_layer)):
            layer_idx +=1
            self.parameter_values["W" + str(layer_idx)] -= self.eta * self.gradient_values["dW" + str(layer_idx)]        
            self.parameter_values["b" + str(layer_idx)] -= self.eta * self.gradient_values["db" + str(layer_idx)]
            
            
            
    def train(
        self
        ):
        self.initialize()
        self.cost_history = []
        self.accuracy_history = []
#         y_things=[]
        for i in range(self.epochs):
            if i%1000==0:
                print("Calculating epoch ",i)
            self.forward_propagation(self.XTrain)
            cost = self.cross_entropy(self.a_current, self.yTrain)
            self.cost_history.append(cost)
            accuracy = self.accuracy(self.a_current, self.yTrain)
            self.accuracy_history.append(accuracy)
#             y_things.append(self.A_current)
            self.backward_propagation()
            self.update()

In [4]:
# number of samples in the data set
N_SAMPLES = 1000
# ratio between training and test sets
TEST_SIZE = 0.1

from sklearn.datasets import make_moons
X, y = make_moons(n_samples = N_SAMPLES, noise=0.2, random_state=100)
X_train_moons, X_test_moons, y_train_moons, y_test_moons = train_test_split(X, y, test_size=TEST_SIZE, random_state=42)

In [5]:
nodes_per_layer = [100,50,1] #last layer must be1
activation_per_layer=['sigmoid','sigmoid','sigmoid']

In [6]:
# ulp = simple(X_train,y_tr,X_test,y_test,0.01,nodes_per_layer,activation_per_layer)
Moons = simple(X_train_moons,y_train_moons,0.01,nodes_per_layer,activation_per_layer)
Moons.train()

Calculating epoch  0
Calculating epoch  1000
Calculating epoch  2000
Calculating epoch  3000
Calculating epoch  4000
Calculating epoch  5000
Calculating epoch  6000
Calculating epoch  7000
Calculating epoch  8000
Calculating epoch  9000


In [9]:
Y_test_hat_moons = Moons.forward_propagation(X_test_moons)
# # Accuracy achieved on the test set
# acc_test = ulp.accuracy(Y_test_hat, np.transpose(y_test.reshape((y_test.shape[0], 1))))
acc_test_moons = Moons.accuracy(Y_test_hat_moons, y_test_moons)
print("Test set accuracy: {:.2f} ".format(acc_test_moons))
zerot_moons = np.zeros(y_test_moons.shape[0])
acc_test_moons2 = Moons.accuracy(y_test_moons, zerot_moons)
print("Zero set accuracy: {:.2f} ".format(acc_test_moons2))

Test set accuracy: 0.82 
Zero set accuracy: 0.54 


In [10]:
def ReadData():
    #importing data set(s)
    filename = 'default of credit card clients.xls'
    nanDict = {} #this does nothing with this data set
    #read file
    df = pd.read_excel(filename,header=1,skiprows=0,index_col=0,na_values=nanDict) 
    #rename last column
    df.rename(index=str, columns={"default payment next month": "defaultPaymentNextMonth"}, inplace=True)
    #Replace nonsensical values in PAY_i columns with 0
    for i in [0,2,3,4,5,6]:
        col = 'PAY_{}'.format(i)
        df[col].replace(to_replace=-2, value = 0, inplace=True)
    #shuffle dataset by row
    df.sample(frac=1)
    
    # Define features and targets 
    X = df.loc[:, df.columns != 'defaultPaymentNextMonth'].values
    y = df.loc[:, df.columns == 'defaultPaymentNextMonth'].values
    
    # Categorical variables to one-hots, setting nonsensical values to 0
    onehotencoder1 = OneHotEncoder(categories='auto')
    onehotencoder2 = OneHotEncoder(categories='auto',drop='first')

    # sets number of elements in onehot vectors automatically from data.
    Xt= ColumnTransformer(
        [("one", onehotencoder1, [1]),("two", onehotencoder2, [2,3]),],
        remainder="passthrough"
    ).fit_transform(X)

    # Train-test split
    trainingShare = 0.5
    seed  = 1
    XTrain, XTest, yTrain, yTest=train_test_split(Xt, y, train_size=trainingShare, \
                                                  test_size = 1-trainingShare,
                                                 random_state=seed, stratify = y)
    
    #scale data, except one-hotted
    sc = StandardScaler()
    XTrain_fitting = XTrain[:,11:]
    XTest_fitting = XTest[:,11:]
    #removes mean, scales by std
    XTrain_scaler = sc.fit_transform(XTrain_fitting)
    XTest_scaler = sc.transform(XTest_fitting)
    #puts together the complete model matrix again
    XTrain_scaled=np.c_[XTrain[:,:11],XTrain_scaler]
    XTest_scaled = np.c_[XTest[:,:11],XTest_scaler]


    
    
    return XTrain_scaled,XTest_scaled,yTrain,yTest

In [11]:
XTrain_cc,XTest_cc,yTrain_cc,yTest_cc = ReadData()

In [12]:
# y_tr = yTrain[np.newaxis,:]

In [13]:
# ulp = simple(X_train,y_tr,X_test,y_test,0.01,nodes_per_layer,activation_per_layer)
CreditCard = simple(XTrain_cc,yTrain_cc,0.01,nodes_per_layer,activation_per_layer)
CreditCard.train()

Calculating epoch  0
Calculating epoch  1000
Calculating epoch  2000
Calculating epoch  3000
Calculating epoch  4000
Calculating epoch  5000
Calculating epoch  6000
Calculating epoch  7000
Calculating epoch  8000
Calculating epoch  9000


In [20]:
Y_test_hat_cc = CreditCard.forward_propagation(XTest_cc)
# # Accuracy achieved on the test set
acc_test_cc = CreditCard.accuracy(Y_test_hat_cc, np.transpose(yTest_cc.reshape((yTest_cc.shape[0], 1))))
acc_test_cc = CreditCard.accuracy(Y_test_hat_cc, yTest_cc.reshape((yTest_cc.shape[0], 1))))
print("Test set accuracy: {:.2f} ".format(acc_test_cc))
zerot = np.zeros(yTest_cc.shape[0])
acc_test_cc2 = CreditCard.accuracy(np.transpose(yTest_cc.reshape((yTest_cc.shape[0], 1))), np.transpose(zerot.reshape((zerot.shape[0], 1))))
print("Zero set accuracy: {:.2f} ".format(acc_test_cc2))

Test set accuracy: 0.81 
Zero set accuracy: 0.78 
