In [None]:
#https://medium.com/@IwriteDSblog/gradient-descent-for-logistics-regression-in-python-18e033775082

In [1]:
import numpy as np
import math
import matplotlib.pyplot as plt

In [2]:
def generateXvector(X):
    """ Taking the original independent variables matrix and add a row of 1 which corresponds to x_0
        Parameters:
          X:  independent variables matrix
        Return value: the matrix that contains all the values in the dataset, not include the outcomes variables. 
    """    
    vectorX = np.c_[np.ones((len(X), 1)), X]
    return vectorX

def theta_init(X):
    """ Generate an initial value of vector θ from the original independent variables matrix
         Parameters:
          X:  independent variables matrix
        Return value: a vector of theta filled with initial guess
    """
    theta = np.random.randn(len(X[0])+1, 1)
    return theta
def sigmoid_function(X):
    """ Calculate the sigmoid value of the inputs
         Parameters:
          X:  values
        Return value: the sigmoid value
    """
    return 1/(1+math.e**(-X))

In [3]:
def Logistics_Regression(X,y,learningrate, iterations):
    """ Find the Logistics regression model for the data set
         Parameters:
          X: independent variables matrix
          y: dependent variables matrix
          learningrate: learningrate of Gradient Descent
          iterations: the number of iterations
        Return value: the final theta vector and the plot of cost function
    """
    
    y_new = np.reshape(y, (len(y), 1))   
    cost_lst = []
    vectorX = generateXvector(X)
    theta = theta_init(X)
    m = len(X)
    print(f"vectorX {vectorX} y {y}")
    print(f"vectorX.shape {vectorX.shape} y_new.shape {y_new.shape}")
    for i in range(iterations):
        gradients = 2/m * vectorX.T.dot(sigmoid_function(vectorX.dot(theta)) - y_new)
        print(f"grad.shape {gradients.shape} w.shape {theta.shape}")
        theta = theta - learningrate * gradients
        y_pred = sigmoid_function(vectorX.dot(theta))
        cost_value = - np.sum(np.dot(y_new.T,np.log(y_pred)+ np.dot((1-y_new).T,np.log(1-y_pred)))) /(len(y_pred))
 #Calculate the loss for each training instance
        cost_lst.append(cost_value)
    plt.plot(np.arange(1,iterations),cost_lst[1:], color = 'red')
    plt.title('Cost function Graph')
    plt.xlabel('Number of iterations')
    plt.ylabel('Cost')
    return theta

In [4]:
from sklearn import datasets
iris = datasets.load_iris()
X = iris["data"]
y = (iris["target"] == 0).astype(np.int) #return 1 if Iris Versicolor, else 0.

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y = (iris["target"] == 0).astype(np.int) #return 1 if Iris Versicolor, else 0.


In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [6]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state = 0, penalty = 'none')
classifier.fit(X_train, y_train)
classifier.intercept_, classifier.coef_

(array([-11.07402312]),
 array([[ -1.32289075,   4.23503694, -10.11887281,  -9.22137322]]))

In [7]:
y_pred = classifier.predict(X_test)
from sklearn.metrics import confusion_matrix, accuracy_score
accuracy_score(y_test, y_pred)

1.0

In [8]:
def column(matrix, i):
    """ Returning all the values in a specific columns
         Parameters:
          X: the input matrix
          i: the column
     Return value: an array with desired column
    """
    return [row[i] for row in matrix]

def accuracy_LR(X,y,learningrate, iteration,X_test, y_test):
    """ Returning the accuracy score for a training model
    
    """
    ideal = Logistics_Regression(X,y,learningrate, iteration)
    hypo_line = ideal[0]
    for i in range(1,len(ideal)):
        hypo_line = hypo_line + ideal[i]*column(X_test,i-1)
    logistic_function = sigmoid_function(hypo_line)
    for i in range(len(logistic_function)):
        if logistic_function[i] >= 0.5:
            logistic_function[i] = 1
        else:
            logistic_function[i] = 0
    last1 = np.concatenate((logistic_function.reshape(len(logistic_function),1), y_test.reshape(len(y_test),1)),1)
    count = 0
    for i in range(len(y_test)):
        if last1[i][0] == last1[i][1]:
            count = count+1
    acc = count/(len(y_test))
    return acc

In [9]:
Logistics_Regression(X_train,y_train, 1, 2) #1000000

vectorX [[ 1.          0.61303014  0.10850105  0.94751783  0.736072  ]
 [ 1.         -0.56776627 -0.12400121  0.38491447  0.34752959]
 [ 1.         -0.80392556  1.03851009 -1.30289562 -1.33615415]
 [ 1.          0.25879121 -0.12400121  0.60995581  0.736072  ]
 [ 1.          0.61303014 -0.58900572  1.00377816  1.25412853]
 [ 1.         -0.80392556 -0.82150798  0.04735245  0.21801546]
 [ 1.         -0.21352735  1.73601687 -1.19037495 -1.20664002]
 [ 1.          0.14071157 -0.82150798  0.72247648  0.47704373]
 [ 1.          0.02263193 -0.12400121  0.21613346  0.34752959]
 [ 1.         -0.09544771 -1.05401024  0.10361279 -0.04101281]
 [ 1.          1.0853487  -0.12400121  0.94751783  1.1246144 ]
 [ 1.         -1.39432376  0.34100331 -1.41541629 -1.33615415]
 [ 1.          1.20342834  0.10850105  0.72247648  1.38364267]
 [ 1.         -1.04008484  1.03851009 -1.24663528 -0.81809761]
 [ 1.         -0.56776627  1.50351461 -1.30289562 -1.33615415]
 [ 1.         -1.04008484 -2.4490238  -0.177688

array([[-0.33194485],
       [-0.18047227],
       [ 0.36660262],
       [-1.50001835],
       [-0.95738733]])

In [10]:
accuracy_LR(X_train,y_train, 1, 2,X_test, y_test) #1000000

vectorX [[ 1.          0.61303014  0.10850105  0.94751783  0.736072  ]
 [ 1.         -0.56776627 -0.12400121  0.38491447  0.34752959]
 [ 1.         -0.80392556  1.03851009 -1.30289562 -1.33615415]
 [ 1.          0.25879121 -0.12400121  0.60995581  0.736072  ]
 [ 1.          0.61303014 -0.58900572  1.00377816  1.25412853]
 [ 1.         -0.80392556 -0.82150798  0.04735245  0.21801546]
 [ 1.         -0.21352735  1.73601687 -1.19037495 -1.20664002]
 [ 1.          0.14071157 -0.82150798  0.72247648  0.47704373]
 [ 1.          0.02263193 -0.12400121  0.21613346  0.34752959]
 [ 1.         -0.09544771 -1.05401024  0.10361279 -0.04101281]
 [ 1.          1.0853487  -0.12400121  0.94751783  1.1246144 ]
 [ 1.         -1.39432376  0.34100331 -1.41541629 -1.33615415]
 [ 1.          1.20342834  0.10850105  0.72247648  1.38364267]
 [ 1.         -1.04008484  1.03851009 -1.24663528 -0.81809761]
 [ 1.         -0.56776627  1.50351461 -1.30289562 -1.33615415]
 [ 1.         -1.04008484 -2.4490238  -0.177688

1.0