In [3]:
import time
import datetime
from sys import maxsize
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from scipy import stats
from sklearn.linear_model import Lasso
from sklearn import preprocessing, svm
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import precision_recall_fscore_support
from IPython.display import display, HTML

# Utilities

In [9]:
"""
Sign of learningRate determines whether to use gradient ascent/descent:
Negative learningRates = descent, positive = ascent
"""
def SGO(x, y, seed, gradientFunc, learningRate, error, maxIter):
    x = np.array(x)
    y = np.array(y)

    # Reshape 1-D arrays to column format
    # if len(x.shape) == 1:
    #     x = x.reshape(-1, 1)

    N = len(x)
    currentError = maxsize
    lastError = 0
    beta = np.array(seed)
#     print('SGO weight shape:', beta.shape)
    i = 0

    while i < maxIter:
        gradient = gradientFunc(beta, x, y)
#         yPredicted = x.dot(beta)
#         sqErrGradient = np.array(np.dot(x.T, (yPredicted - y)) / N)
        beta += learningRate * gradient
#         currentError = np.sum(np.square(y - yPredicted)) / N
        currentError = np.sum(gradient)
#         print(currentError)

        if abs(lastError - currentError) < error:
            break
        lastError = currentError
        i += 1

    return beta

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Neural Network

In [120]:
class NeuralNetwork:
    """
    inputNeurons: number of input neurons
    hiddenLayerShape: (number of layers, number of neurons per layer) tuple
    outputNeurons: number of output neurons
    """
    def __init__(self, inputNeurons, hiddenLayerShape, outputNeurons, learnRate=1e-3, decay=0):
        self.inputNeurons = inputNeurons + 1
        self.hiddenLayerShape = hiddenLayerShape
        self.outputNeurons = outputNeurons
        hiddenLayers, hiddenNeurons = hiddenLayerShape
        
        self.a_input = np.ones(self.inputNeurons, dtype=float)
        self.a_hidden = np.ones(shape=(hiddenLayers, hiddenNeurons), dtype=float)
        self.a_output = np.ones(self.outputNeurons, dtype=float)
        
        self.w_input = np.random.randn(self.inputNeurons, self.hiddenLayerShape[1])
        print('Input weights:', self.w_input)
        self.w_hidden = np.random.randn(hiddenLayers, hiddenNeurons, hiddenNeurons)
        print('Hidden weights:', self.w_hidden)
        self.w_output = np.random.randn(self.hiddenLayerShape[1], self.outputNeurons)
        print('Output weights:', self.w_output)
        
        self.learnRate = learnRate
        self.decay = decay
    
    def fit(self, x, y):
        self._feedForward(x[0])
    
    def predict(self, x):
        pass
    
    def score(self, x, y):
        accuracy = 0
        precision = 0
        recall = 0
        f1 = 0
        
        return accuracy, precision, recall, f1
    
    def _feedForward(self, x):
        if type(x) != np.ndarray:
            x = np.array(x)
        
        self.a_input[:-1] = x
#         for i in range(len(self.a_input) - 1):
#             self.a_input[i] = self.w_input[i]
        print('Input activation:', self.a_input)
        hiddenLayers, hiddenNeurons = self.hiddenLayerShape
        print(self.w_hidden[0])
        
        if hiddenLayers > 0:
#             self.a_hidden[0] = sigmoid(self.a_input * self.w_input)
            for j in range(hiddenNeurons):
                activation = 0.0
                
                for i in range(self.inputNeurons):
                    activation += self.a_input[i] * self.w_input[i][j]
                self.a_hidden[0][j] = sigmoid(activation)
        
            for layer in range(1, hiddenLayers):
                print('Setting activations for layer', layer)
                for j in range(hiddenNeurons):
                    activation = 0.0

                    for i in range(hiddenNeurons):
#                         print('A_Hidden[{}][{}] len={}'.format(layer, i, len(self.a_hidden[layer][i])))
                        activation += self.a_hidden[layer][i] * self.w_hidden[layer][i][j]
                    self.a_hidden[layer][j] = sigmoid(activation)
        print('Hidden activations:', self.a_hidden)
        print('Output weights:', self.w_output)
        print('Final hidden activations:', self.a_hidden[-1][0])
#         print('Final hidden activation weighted sums:', np.sum())
        print('w_output[0]:', self.w_output[0])
        
        for k in range(self.outputNeurons):
            activation = 0.0
            
#             self.a_output = sigmoid(np.sum(self.a_hidden[-1] * self.w_output[:][k], axis=0))
            for j in range(hiddenNeurons):
                activation += self.a_hidden[-1][j] * self.w_output[j][k]
            
            self.a_output[k] = sigmoid(activation)
        print('Output activations:', self.a_output)
    
    def _backPropagate(self, x, y):
        pass

# Predicting Malignancy of Breast Cancer Cases
## Source: [UCI ML Repository](https://archive.ics.uci.edu/ml/datasets/Breast+Cancer+Wisconsin+%28Diagnostic%29)

In [121]:
breastCancerDf = pd.read_csv('data/breast-cancer-wisconsin.csv')
breastCancerDf.loc[breastCancerDf['Class'] == 2, 'Class'] = 1
breastCancerDf.loc[breastCancerDf['Class'] == 4, 'Class'] = -1
# breastCancerDf.drop('BareNuclei', axis=1)
breastCancerDf.drop(breastCancerDf[breastCancerDf['BareNuclei'] == '?'].index, inplace=True)

yColumn = 'Class'
xColumns = [col for col in breastCancerDf.columns
            if col != 'ID' and col != yColumn
           and col != 'BareNuclei']
display(breastCancerDf[xColumns + [yColumn]].head())

print('% malignant', len(breastCancerDf['Class'].loc[breastCancerDf['Class'] == 1]) / len(breastCancerDf['Class']))

# Split into test and training sets
xTrain, xTest, yTrain, yTest = train_test_split(breastCancerDf[xColumns].as_matrix(),
                                               breastCancerDf[yColumn].as_matrix(),
                                               test_size=1/3, random_state=int(time.time()))
# np.random.seed(524)
# trainProportion = 0.8
# trainMask = np.random.rand(len(breastCancerDf)) < trainProportion
# cancerTrainingDf = breastCancerDf[trainMask]
# cancerTestDf = breastCancerDf[~trainMask].reset_index()
# print('Total # cancer samples: {}, training samples: {}, test samples: {}'.format(
#     len(breastCancerDf), len(cancerTrainingDf), len(cancerTestDf)))

Unnamed: 0,ClumpThickness,CellSizeUniformity,CellShapeUniformity,MarginalAdhesion,SingleEpithelialCellSize,BlandChromatin,NormalNucleoli,Mitoses,Class
0,5,1,1,1,2,3,1,1,1
1,5,4,4,5,7,3,2,1,1
2,3,1,1,1,2,3,1,1,1
3,6,8,8,1,3,3,7,1,1
4,4,1,1,3,2,3,1,1,1


% malignant 0.6500732064421669


In [122]:
nn = NeuralNetwork(8, (2, 4), 2)
nn.fit(xTrain, yTrain)
accuracy, precision, recall, f1 = nn.score(xTest, yTest)
print('Accuracy: {:.4f}, precision: {:.4f}, recall: {:.4f}, F1: {:.4f}'.format(
        accuracy, precision, recall, f1))

Input weights: [[-0.65723777 -1.48285539 -2.31676589 -0.33302864]
 [ 0.3389728   0.90484448 -1.4442845   1.01552641]
 [ 0.68492251 -0.84750782  0.62040414 -0.20199639]
 [ 1.11589366 -0.20005506 -1.32576808 -0.05333017]
 [ 1.20309489  0.61670023 -0.51493997 -1.1730109 ]
 [ 0.72548942  0.40987306  1.24665182  0.21724455]
 [-1.57138025 -1.42061527  0.10590772  0.99210366]
 [ 0.88785547  0.62044674  0.94314673 -1.5090613 ]
 [-1.18062996  0.61696249 -1.18600178 -2.34024603]]
Hidden weights: [[[-0.28732711 -0.3898716  -0.01068194 -0.08406513]
  [-0.34183996 -1.05861583  0.66384636  0.82351035]
  [ 0.3420864   0.65909418  2.16150419  0.2393999 ]
  [ 0.14276957  1.23747873 -0.18438391  1.16203792]]

 [[ 0.54286474 -0.278951    1.17058078  1.90065995]
  [ 0.54249537 -1.60578429  0.62640042 -0.30540814]
  [-0.63986537 -0.4449977   0.51676026  0.75540616]
  [-1.11510852  0.85266544 -3.20206272 -0.15306086]]]
Output weights: [[-1.15422994  0.0243604 ]
 [-0.14977086 -0.1938041 ]
 [-1.21364844 -0.46