In [255]:
# implementing logistic regression

In [256]:
# importing libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [257]:
# importing dataset

dataset = pd.read_excel('Machine Learning Algos.xlsx',sheet_name='Logistic Regression').head(12)
dataset.head()

Unnamed: 0,Height (cm),Label,sig,f(X),error,WF,y_pred,b,-36.8424613773466,Unnamed: 9
0,120.0,0.0,6e-06,-5.421616e-07,6e-06,4.369884e-09,0.0,w,0.20687,
1,124.0,0.0,1.4e-05,-1.240214e-06,1.4e-05,2.362882e-08,0.0,examples,12.0,
2,125.0,0.0,1.7e-05,-1.52524e-06,1.7e-05,3.602563e-08,0.0,lr,0.02,
3,128.0,0.0,3.2e-05,-2.837008e-06,3.2e-05,1.276288e-07,0.0,nb,-36.828097,-36.842461
4,130.0,0.0,4.8e-05,-4.290814e-06,4.8e-05,2.96505e-07,0.0,nw,0.247338,0.20687


In [258]:
# selecting features
features = dataset['Height (cm)']

# selecting labels
labels = dataset['Label']

In [259]:
# logistic regression by sklearn
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression()
logreg.fit(features.values.reshape(-1,1), labels)
logreg.coef_, logreg.intercept_

(array([[0.24473587]]), array([-38.31130303]))

In [260]:
# sklearn cost function
from sklearn.metrics import log_loss
log_loss(labels, logreg.predict_proba(features.values.reshape(-1,1)))

0.0007565415791482554

In [261]:
# implementing logistic regression from scratch
def sigmoid(x):
    return 1/(1+np.exp(-x))

def cost_function(features, labels, weights):
    m = len(labels)
    predictions = sigmoid(np.dot(features, weights))
    cost = (1/m) * np.sum(-labels*np.log(predictions) - (1-labels)*np.log(1-predictions))
    return cost

def update_weights(features, labels, weights, lr):
    m = len(labels)
    predictions = sigmoid(np.dot(features, weights))
    gradient = np.dot(features.T, predictions-labels)
    weights -= lr * gradient
    return weights

def train(features, labels, weights, lr, iters):
    cost_history = []
    for i in range(iters):
        weights = update_weights(features, labels, weights, lr)
        cost = cost_function(features, labels, weights)
        cost_history.append(cost)
    return weights, cost_history

# training the model
weights = np.zeros(2)
features = np.c_[np.ones(len(features)), features]
weights, cost_history = train(features, labels, weights, 0.01, 10000)

  cost = (1/m) * np.sum(-labels*np.log(predictions) - (1-labels)*np.log(1-predictions))
  return 1/(1+np.exp(-x))


In [262]:
weights

array([-41.27559218,   0.26498447])

In [263]:
cost_history[-1]

0.0004457758460319275

In [264]:
# predicting the labels
def predict(features, weights):
    z = np.dot(features, weights)
    return sigmoid(z)

predictions = predict(features, weights)
predictions

array([7.65526188e-05, 2.20913635e-04, 2.87921904e-04, 6.37333532e-04,
       1.08227084e-03, 8.30546762e-04, 9.98376608e-01, 9.99567949e-01,
       9.99885114e-01, 9.99969458e-01, 9.99991881e-01, 9.99997842e-01])

In [265]:
# applying threshold
predictions = np.where(predictions > 0.5, 1, 0)
predictions

array([0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1])

In [266]:
# creating a dataframe of all the predictions
df = pd.DataFrame({'features': features[:,1], 'labels': labels, 'sigmoid': predict(features, weights), 'error': labels-predictions, 'y_pred': predictions})
df

Unnamed: 0,features,labels,sigmoid,error,y_pred
0,120.0,0.0,7.7e-05,0.0,0
1,124.0,0.0,0.000221,0.0,0
2,125.0,0.0,0.000288,0.0,0
3,128.0,0.0,0.000637,0.0,0
4,130.0,0.0,0.001082,0.0,0
5,129.0,0.0,0.000831,0.0,0
6,180.0,1.0,0.998377,0.0,1
7,185.0,1.0,0.999568,0.0,1
8,190.0,1.0,0.999885,0.0,1
9,195.0,1.0,0.999969,0.0,1


In [None]:
# appending the column in the data frame