**Goal: Create a network with one hidden layer**

In [5]:
import numpy as np

In [76]:
def initialize(X, layer_size=2):
    
    n_x = X.shape[0]
    
    w = []
    for i in range(layer_size):
        w.append(np.zeros((n_x, 1)))
    w.append(np.zeros((layer_size, 1)))
    b = np.array(np.linspace(-1, 1, num=layer_size+1))
    
    return w, b

In [77]:
X = np.array([[1,2,3],[1,2,3]])
Y = np.full((3,1), 1)

In [78]:
w, b = initialize(X)

print(w, b)

[array([[0.],
       [0.]]), array([[0.],
       [0.]]), array([[0.],
       [0.]])] [-1.  0.  1.]


In [9]:
def sigmoid(x):
    '''Sigmoid/logistic function.
    
        x = numpy array or float'''
    
    return 1/(1+np.exp(-x))

In [10]:
def logloss(y, yhat):
    '''Loss function for logistic regression.
    
        y = true (binary) class
        yhat = predicted class probability'''
    
    return -(y*np.log(yhat)+(1-y)*np.log(1-yhat))

In [59]:
def propogate(X, Y, w, b, layer_size=2):
    '''Implement forward and backward propogation for simple logistic regression.
    
        X = n_x by m numpy array containg training data where n_x is the number of features and m is the number of samples
        Y = m by 1 numpy array containing true sample classes
        w = numpy array of model parameters
        b = model bias array'''
    
    m = X.shape[1]
    sum_vec = np.full((1,m), 1)
    
    # forward propogation
    # hidden layer
    hidden = []
    for i in range(layer_size):
        hidden.append(sigmoid(np.dot(X.T,w[i])+b[i]))
    hidden = np.concatenate(hidden, axis=1)
    #print(hidden.shape)
    
    # output layer
    out = sigmoid(np.dot(hidden,w[-1])+b[-1])
    
    # backward propogation
    dw = []
    for i in range(layer_size):
        dw.append((1/m)*np.dot(sum_vec, (out-Y)*w[-1][i]*hidden[:,i].reshape(m,1)*(1-hidden[:,i].reshape(m,1))*X.T))
    dw.append((1/m)*np.dot(sum_vec, (out-Y)*hidden))
    
    db = []
    for i in range(layer_size):
        db.append((1/m)*np.sum((out-Y)*w[-1][i]*hidden[:,i].reshape(m,1)*(1-hidden[:,i].reshape(m,1))))
    db.append((1/m)*np.sum(out-Y))
    
    gradient = dict({'dw': dw, 'db': db})
    
    return gradient

In [12]:
gradient = propogate(X, Y, w, b)

In [13]:
gradient

{'dw': [array([[0., 0.]]),
  array([[0., 0.]]),
  array([[-0.07232949, -0.13447071]])],
 'db': [0.0, 0.0, -0.2689414213699951]}

In [43]:
def fit(X, Y, w, b, layer_size=2, iterations=2000, learning_rate=0.01):
    '''Implement gradient descent fitting procedure.
    
        X = n_x by m array with samples in columns
        Y = m by 1 array of true binary class values
        w = array of model parameters
        b = model bias array'''
    
    n_x = X.shape[0]    # number of features
    m = X.shape[1]    # number of training samples
    
    # gradient descent loop
    for i in range(iterations):
        gradient = propogate(X, Y, w, b, layer_size=layer_size)
        dw = gradient['dw']
        db = gradient['db']
        for i in range(layer_size):
            w[i] = w[i] - learning_rate*dw[i].reshape(n_x,1)
            b[i] = b[i] - learning_rate*db[i]
        w[-1] = w[-1] - learning_rate*dw[-1].reshape(layer_size,1)
        b[-1] = b[-1] - learning_rate*db[-1]
    
    parameters = dict({'w': w, 'b': b})
    return parameters

In [16]:
fit(X, Y, w, b)

{'w': [array([[0.16518403],
         [0.16518403]]),
  array([[0.28924198],
         [0.28924198]]),
  array([[0.50815683],
         [0.96924372]])],
 'b': array([-0.91686881,  0.15322198,  2.60037197])}

In [17]:
def class_val(x):
    '''Find the most likely class given the input probability x.'''
    if x <= 0.5:
        return 0
    else:
        return 1

# use numpy vectorization to apply class_val to an array
vclass_val = np.vectorize(class_val)

In [22]:
def predict(X, w, b, layer_size=2):
    '''Predict class using logistic regression model.
    
        X = array with n_x rows, samples in columns'''
    
    # hidden layer
    hidden = []
    for i in range(layer_size):
        hidden.append(sigmoid(np.dot(X.T,w[i])+b[i]))
    hidden = np.concatenate(hidden, axis=1)
    
    # output layer
    out = sigmoid(np.dot(hidden,w[-1])+b[-1])
    
    return vclass_val(out)

In [23]:
predict(X, w, b)

array([[1],
       [1],
       [1]])

In [44]:
def LogModel(X_train, Y_train, X_test, Y_test, layer_size=2, iterations=2000, learning_rate=0.01):
    '''Build logistic regression model using training data and test on provided
    testing data.
    
        X_train = n_x by m array with feature vectors in columns
        Y_train = m by 1 array of true binary class for each training sample
        X_test = n_x by any number array with feature vectors in columns
        Y_test = column vector of true binary class for each test sample
        iterations = positive integer, number of iterations for gradient descent
        learning_rate = positive float, learning rate used for gradient descent
        
    Prints percentage of correct predictions on testing data using the model fit by gradient descent.
    Returns model parameters.'''
    
    w, b = initialize(X_train, layer_size=layer_size)
    
    parameters = fit(X_train, Y_train, w, b, layer_size=layer_size, iterations=iterations, learning_rate=learning_rate)
    w = parameters['w']
    b = parameters['b']
    
    predictions = predict(X_test, w, b, layer_size=layer_size)
    
    model_accuracy = 100-np.average(np.abs(Y_test-predictions))*100
    
    print('Model accuracy: {:.4f}%'.format(model_accuracy))
    
    return parameters

In [47]:
X_train, Y_train, X_test, Y_test = np.array([[1.,2.,-1.],[3.,4.,-3.2]]), np.array([[1],[0],[1]]), np.array([[1.,2.,-1.],[3.,4.,-3.2]]), np.array([[1],[0],[1]])

LogModel(X_train, Y_train, X_test, Y_test, iterations=2000)

Model accuracy: 66.6667%


{'w': [array([[0.22710844],
         [0.39658963]]),
  array([[0.28710373],
         [0.55467815]]),
  array([[-0.44534983],
         [-0.78345754]])],
 'b': array([-0.99513974, -0.08545368,  1.21543515])}

In [26]:
propogate(X_train, Y_train, w, b)

{'dw': [array([[0.08261021, 0.16552093]]),
  array([[0.07363465, 0.14954918]]),
  array([[0.16197719, 0.27118063]])],
 'db': [0.03896272928411102, 0.0314154078861343, 0.29951223854595815]}

In [27]:
fit(X_train, Y_train, w, b)

{'w': [array([[-0.16982817],
         [-0.54385937]]),
  array([[-0.24610115],
         [-0.73762346]]),
  array([[0.59105422],
         [1.10245521]])],
 'b': array([-0.94309061,  0.05547752,  0.36230521])}

In [71]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd

In [72]:
train_location = '/Users/connorodell/Documents/Data_Science/learning/exercise_03_train.csv'

train_df = pd.read_csv(train_location)

In [73]:
# Clean data. Details can be found in other notebook
train_df = train_df.dropna()

# Correct days so that all are spelled out
train_df['x35'] = train_df['x35'].map(lambda x: 'wednesday' if x=='wed' else 'thursday' if (x=='thur' or x=='thurday') else 'friday' if x=='fri' else x)

# Correct sept. to Sept and Dev to Dec in column x68
train_df['x68'] = train_df['x68'].map(lambda x: 'Jan' if x=='January' else 'Sept' if x=='sept.' else 'Dec' if x=='Dev' else x)

# Transform columns x34, x35, x68, and x93 to dummy variables
train_df = pd.get_dummies(train_df, columns=['x34', 'x35', 'x68', 'x93'])

# Transform columns x41 and x45 to floats
train_df['x41'] = train_df['x41'].map(lambda x: x.lstrip('$'))
train_df['x41'] = pd.to_numeric(train_df['x41'])

train_df['x45'] = train_df['x45'].map(lambda x: x.rstrip('%'))
train_df['x45'] = pd.to_numeric(train_df['x45'])

In [74]:
# split data into train/test sets
X_train, X_test, Y_train, Y_test = train_test_split(train_df.drop('y', axis=1), train_df['y'], test_size=0.2, random_state=42)

# scale data using the standard scaler in sklearn
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

# Get numpy columns for Y
Y_train = Y_train.values.reshape(len(Y_train),1)
Y_test = Y_test.values.reshape(len(Y_test),1)

In [61]:
# run new model
# note that samples are in rows of X_train and X_test, so the transposes are fed into the models
params = LogModel(X_train.T, Y_train, X_test.T, Y_test)

Model accuracy: 79.6837%


In [136]:
params

{'w': [array([[ 0.0453657 ],
         [-0.07802928],
         [-0.06925341],
         [ 0.07898884],
         [ 0.0074387 ],
         [ 0.05845059],
         [-0.00113127],
         [-0.00445091],
         [ 0.00860571],
         [-0.00560125],
         [ 0.06763096],
         [ 0.00282689],
         [ 0.01105655],
         [-0.00404737],
         [ 0.00541511],
         [ 0.00475953],
         [ 0.00372391],
         [ 0.00542627],
         [-0.00199364],
         [-0.00551417],
         [ 0.05904367],
         [ 0.07177388],
         [ 0.07340211],
         [-0.00522769],
         [-0.0013216 ],
         [ 0.00217425],
         [-0.00435343],
         [ 0.00491317],
         [-0.0024353 ],
         [ 0.00616591],
         [ 0.01263308],
         [ 0.00644607],
         [-0.00139472],
         [-0.07094399],
         [-0.00074028],
         [ 0.15146677],
         [-0.00885969],
         [ 0.00231525],
         [-0.06928928],
         [ 0.13768633],
         [ 0.00484384],
         [ 

In [60]:
params = LogModel(X_train.T, Y_train, X_test.T, Y_test, layer_size=3)

Model accuracy: 79.6837%


In [62]:
for i in range(20):
    print('Number of hidden neurons = ', i+2)
    params = LogModel(X_train.T, Y_train, X_test.T, Y_test, layer_size=i+2)

Number of hidden neurons =  2
Model accuracy: 79.6837%
Number of hidden neurons =  3
Model accuracy: 79.6837%
Number of hidden neurons =  4
Model accuracy: 79.6837%
Number of hidden neurons =  5
Model accuracy: 79.6837%
Number of hidden neurons =  6
Model accuracy: 79.6837%
Number of hidden neurons =  7
Model accuracy: 79.6837%
Number of hidden neurons =  8
Model accuracy: 79.6837%
Number of hidden neurons =  9
Model accuracy: 79.6837%
Number of hidden neurons =  10
Model accuracy: 79.6837%
Number of hidden neurons =  11
Model accuracy: 79.6837%
Number of hidden neurons =  12
Model accuracy: 79.6837%
Number of hidden neurons =  13
Model accuracy: 79.6837%
Number of hidden neurons =  14
Model accuracy: 79.6837%
Number of hidden neurons =  15
Model accuracy: 79.6837%
Number of hidden neurons =  16
Model accuracy: 79.6837%
Number of hidden neurons =  17
Model accuracy: 79.6837%
Number of hidden neurons =  18
Model accuracy: 79.6837%
Number of hidden neurons =  19
Model accuracy: 79.6837%


Notes:

* The program I wrote works in the sense that it runs without any errors
* By the results above, it is clear that there is something wrong with my network, i.e., adding neurons has no impact on model accuracy.
* Let's continue on to the next week of the course to see if I can find out how to fix my model

In [63]:
params = LogModel(X_train.T, Y_train, X_test.T, Y_test, layer_size=60)

Model accuracy: 79.6837%


Note: Increasing the number of neurons in the hidden layer doesn't seem to have any effect.

In [65]:
X_train.shape

(31363, 126)

In [66]:
params = LogModel(X_train.T, Y_train, X_test.T, Y_test, layer_size=126)

Model accuracy: 79.6837%


In [75]:
params = LogModel(X_train.T, Y_train, X_test.T, Y_test, layer_size=2)

Model accuracy: 79.6837%
