# SetUp & Data Processing

### Import Packages

In [0]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error
from tqdm import tqdm_notebook
import warnings
warnings.filterwarnings('ignore')
import sklearn.datasets
%matplotlib inline
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

### Use inbuilt dataset - SKLearn Cancer Prediction Dataset

In [2]:
cancerVals = sklearn.datasets.load_breast_cancer()
x = cancerVals.data
y = cancerVals.target
df = pd.DataFrame(cancerVals.data, columns=cancerVals.feature_names)
df['cancer'] = cancerVals.target
label = df['cancer']
df = df.drop('cancer', axis=1)

df.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,radius error,texture error,perimeter error,area error,smoothness error,compactness error,concavity error,concave points error,symmetry error,fractal dimension error,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,1.095,0.9053,8.589,153.4,0.006399,0.04904,0.05373,0.01587,0.03003,0.006193,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,0.5435,0.7339,3.398,74.08,0.005225,0.01308,0.0186,0.0134,0.01389,0.003532,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,0.7456,0.7869,4.585,94.03,0.00615,0.04006,0.03832,0.02058,0.0225,0.004571,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,0.4956,1.156,3.445,27.23,0.00911,0.07458,0.05661,0.01867,0.05963,0.009208,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,0.7572,0.7813,5.438,94.44,0.01149,0.02461,0.05688,0.01885,0.01756,0.005115,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


### Split into Test and Train Data

In [0]:
X_train, X_validate, Y_train, Y_validate = train_test_split(df, label, test_size=0.1, random_state=3, stratify=label)

# Class Sigmoid Neuron - Compressed Version

In [0]:
class SigmoidNeuronCompressed :
  def __init__ (self) : self.w, self.b = [0], 0    
  def f ( self, x ) : return 1.0/(1.0 + np.exp(-( np.dot(self.w,x) + self.b )))
  def grad_w_reg ( self, x, y ) : return (self.f(x)-y) * self.f(x) * (1-self.f(x)) * x
  def grad_b_reg ( self, x, y ) : return (self.f(x)-y) * self.f(x) * (1-self.f(x))  
  def grad_w_class ( self, x, y ) : return self.f(x) * x      if y == 0      else ( self.f(x) - 1 ) * x           
  def grad_b_class ( self, x, y ) : return self.f(x)      if y == 0      else ( self.f(x) - 1 )
  def fit ( self, X_train, Y_train, epochs=1, learning_rate=1) :
    if (type(X_train) == pd.core.frame.DataFrame) : X_train, Y_train = X_train.values, Y_train.values     
    self.type_, self.w, self.b = ['class', np.zeros(X_train.shape[1]).reshape(-1,1).T, 0]      if (list(Y_train).count(0)+list(Y_train).count(1)==Y_train.shape[0])      else ['reg', np.zeros(X_train.shape[1]).reshape(-1,1).T, 0]
    for i in range(epochs) :
      dw, db = 0, 0
      for x,y in zip(X_train,Y_train):dw,db=[dw+self.grad_w_reg(x,y),db+self.grad_b_reg(x,y)] if self.type_=='reg' else[dw+self.grad_w_class(x,y),db+self.grad_b_class(x,y)]
      self.w, self.b = self.w - (learning_rate*dw) , self.b - (learning_rate*db)         
  def predict ( self, X_validate, binarise=False, Threshold=-1 ) :
    X_validate, Y_pred = [X_validate.values, []]      if (type(X_validate) == pd.core.frame.DataFrame)      else [X_validate, []]
    for x in X_validate : Y_pred.append(self.f(x))   
    if binarise and self.type_=='reg' : self.threshold = Y_pred.mean      if Threshold == -1      else Threshold
    return np.array((np.array(Y_pred)>=self.threshold).astype('int').ravel() if binarise and self.type_=='reg' else Y_pred )

# Comparision To Logistic Regression

### Import Logistic Regression from SKLearn

In [0]:
from sklearn.linear_model import LogisticRegression

### Instantiate Both Classes

In [0]:
logreg = LogisticRegression()
sn = SigmoidNeuronCompressed()

### Fit **Same Data** on Both Models

In [0]:
logreg.fit(X_train, Y_train)
sn  .  fit(X_train, Y_train, epochs=250, learning_rate=0.015)

### Predict For Both Models

In [0]:
Y_pred_logistic = logreg.predict(X_validate)
Y_pred_sigmoid = sn.predict(X_validate)

### Compare Accuracies For Both Models

In [9]:
# Compare accuracies
print ('   Accuracy of sigmoid neuron   = ', accuracy_score(Y_pred_sigmoid , Y_validate)*100, '%')
print ('Accuracy of logistic regression = ', accuracy_score(Y_pred_logistic, Y_validate)*100, '%')
print ('    Net increase in accuracy    = ', accuracy_score(Y_pred_sigmoid , Y_validate)*100 - accuracy_score(Y_pred_logistic, Y_validate)*100 , '%')

   Accuracy of sigmoid neuron   =  94.73684210526315 %
Accuracy of logistic regression =  89.47368421052632 %
    Net increase in accuracy    =  5.263157894736835 %


# The Accuracy for Logistic Model is **89.47368421052632 %** <br>
# The Accuracy for  Sigmoid Model is **94.73684210526315 %** <br>
# Net increase in accuracy of Models is **5.263157894736835 %** <br>