In [1]:
# implementing logistic regression

In [20]:
# importing libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
# logistic regression by sklearn
from sklearn.linear_model import LogisticRegression

In [28]:
# importing dataset

dataset = pd.read_excel('Machine Learning Algos.xlsx',sheet_name='Logistic Regression').head(12)
dataset

Unnamed: 0,Height (cm),Label,sig,f(X),error,WF,y_pred,b,-36.8424613773466,Unnamed: 9
0,120.0,0.0,6e-06,-5.421616e-07,6e-06,4.369884e-09,0.0,w,0.20687,
1,124.0,0.0,1.4e-05,-1.240214e-06,1.4e-05,2.362882e-08,0.0,examples,12.0,
2,125.0,0.0,1.7e-05,-1.52524e-06,1.7e-05,3.602563e-08,0.0,lr,0.02,
3,128.0,0.0,3.2e-05,-2.837008e-06,3.2e-05,1.276288e-07,0.0,nb,-36.828097,-36.842461
4,130.0,0.0,4.8e-05,-4.290814e-06,4.8e-05,2.96505e-07,0.0,nw,0.247338,0.20687
5,129.0,0.0,3.9e-05,-3.488996e-06,3.9e-05,1.94538e-07,0.0,,,
6,180.0,1.0,0.5973,-0.9081088,-0.4027,-17.43524,1.0,,,
7,185.0,1.0,0.80668,-0.7776001,-0.19332,-5.577327,1.0,,,
8,190.0,1.0,0.921502,-0.7198051,-0.078498,-1.078862,1.0,,,
9,195.0,1.0,0.97061,-0.6972565,-0.02939,-0.1634803,1.0,,,


In [29]:
# selecting features
features = dataset['Height (cm)']
# selecting labels
labels = dataset['Label']

In [30]:
# logistic regression by sklearn
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression()
logreg.fit(features.values.reshape(-1,1), labels)
logreg.coef_, logreg.intercept_

(array([[0.24473587]]), array([-38.31130303]))

In [31]:
# sklearn cost function
from sklearn.metrics import log_loss
log_loss(labels, logreg.predict_proba(features.values.reshape(-1,1)))

0.0007565415791482275

In [32]:
# implementing logistic regression from scratch
def sigmoid(x):
    return 1/(1+np.exp(-x))



In [33]:
def cost_function(features, labels, weights):
    m = len(labels)
    predictions = sigmoid(np.dot(features, weights))
    cost = (1/m) * np.sum(-labels*np.log(predictions) - (1-labels)*np.log(1-predictions))
    return cost

In [34]:
def update_weights(features, labels, weights, lr):
    m = len(labels)
    predictions = sigmoid(np.dot(features, weights))
    gradient = np.dot(features.T, predictions-labels)
    weights -= lr * gradient
    return weights

In [35]:
def train(features, labels, weights, lr, iters):
    cost_history = []
    for i in range(iters):
        weights = update_weights(features, labels, weights, lr)
        cost = cost_function(features, labels, weights)
        cost_history.append(cost)
    return weights, cost_history

In [36]:
# training the model
weights = np.zeros(2)
features = np.c_[np.ones(len(features)), features]
weights, cost_history = train(features, labels, weights, 0.01, 10000)

  cost = (1/m) * np.sum(-labels*np.log(predictions) - (1-labels)*np.log(1-predictions))
  return 1/(1+np.exp(-x))


In [37]:
weights

array([-41.27559218,   0.26498447])

In [38]:
cost_history[-1]

0.0004457758460319275

In [39]:
# predicting the labels
def predict(features, weights):
    z = np.dot(features, weights)
    return sigmoid(z)

predictions = predict(features, weights)
predictions

array([7.65526188e-05, 2.20913635e-04, 2.87921904e-04, 6.37333532e-04,
       1.08227084e-03, 8.30546762e-04, 9.98376608e-01, 9.99567949e-01,
       9.99885114e-01, 9.99969458e-01, 9.99991881e-01, 9.99997842e-01])

In [40]:
# applying threshold
predictions = np.where(predictions > 0.5, 1, 0)
predictions

array([0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1])

In [41]:
# creating a dataframe of all the predictions
df = pd.DataFrame({'features': features[:,1], 'labels': labels, 'sigmoid': predict(features, weights), 'error': labels-predictions, 'y_pred': predictions})
df

Unnamed: 0,features,labels,sigmoid,error,y_pred
0,120.0,0.0,7.7e-05,0.0,0
1,124.0,0.0,0.000221,0.0,0
2,125.0,0.0,0.000288,0.0,0
3,128.0,0.0,0.000637,0.0,0
4,130.0,0.0,0.001082,0.0,0
5,129.0,0.0,0.000831,0.0,0
6,180.0,1.0,0.998377,0.0,1
7,185.0,1.0,0.999568,0.0,1
8,190.0,1.0,0.999885,0.0,1
9,195.0,1.0,0.999969,0.0,1


In [42]:
df = pd.DataFrame({'features': features[:,1]})

In [43]:
df

Unnamed: 0,features
0,120.0
1,124.0
2,125.0
3,128.0
4,130.0
5,129.0
6,180.0
7,185.0
8,190.0
9,195.0


In [44]:
df = pd.DataFrame({ 'labels': labels,})

In [45]:
df

Unnamed: 0,labels
0,0.0
1,0.0
2,0.0
3,0.0
4,0.0
5,0.0
6,1.0
7,1.0
8,1.0
9,1.0


In [46]:
df = pd.DataFrame({ 'sigmoid': predict(features, weights)})

In [47]:
df

Unnamed: 0,sigmoid
0,7.7e-05
1,0.000221
2,0.000288
3,0.000637
4,0.001082
5,0.000831
6,0.998377
7,0.999568
8,0.999885
9,0.999969
