Here is a jupyter notebook using Gradient Descent to solve Logisitcs Regression problem. 

The objective function of Logistics Regression with Ridge Regularization is 
$$
f(x) = \frac{1}{n} \sum_{i=1}^n [\log (1 + \exp(x^{\intercal} A_{i*})) - b_i x^{\intercal} A_{i*}] + \lambda \|x\|^2,
$$
where $A \in \mathbb{R}^{n \times d}$ is the design matrix, $b \in \mathbb{R}^n$ is the response vector and $\lambda$ is the regularization parameter. 

In [1]:
import numpy as np
import matplotlib.pyplot as plt

## Sigmoid function 

In [2]:
def sigmoid(Z):
    return 1/(1+ np.e**(-Z))

## Logistic Regression objective function 

In [40]:
def logistic_regression_obj(x,A,b,labd):
    val = 0
    for i in range(A.shape[0]): 
        val += np.log(1.0 + np.exp(np.dot(A[i,:],x))) - b[i]*np.dot(A[i,:],x)
    return val/A.shape[0] + labd*np.linalg.norm(x)

In [41]:
logistic_regression_obj(x,A,b, labd)

array([5.09011107])

## Derivative of objective function 

In [44]:
A[1][2]

0.04918273950573948

In [47]:
def dev_LR_obj(x,A,b,labd):
    dev = []
    for j in range(A.shape[1]):
        val = 0
        for i in range(A.shape[0]):
            val += 1./A.shape[0] * 1.0/(1.0 + np.exp(np.dot(A[i,:],x))) * np.exp(np.dot(A[i,:],x)) * A[i][j] - b[i]*A[i][j]
        dev.append(val + 2*labd*x[j])
    return dev

In [48]:
dev_LR_obj(x,A,b,labd)

[array([0.75029534]),
 array([0.49166339]),
 array([1.84644406]),
 array([1.91282282]),
 array([0.23874081]),
 array([3.3679231]),
 array([-1.2741634]),
 array([-1.66146868]),
 array([3.69558166]),
 array([0.79389051]),
 array([2.48792327]),
 array([2.72071337]),
 array([2.71007612]),
 array([0.00028901]),
 array([-1.6899758]),
 array([3.45344412]),
 array([2.72529991]),
 array([-0.9452044]),
 array([2.30906382]),
 array([-0.7958489]),
 array([-2.14246137]),
 array([2.06439462]),
 array([3.34869303]),
 array([2.17930866]),
 array([5.25850366]),
 array([0.29041011]),
 array([1.18183777]),
 array([2.25411891]),
 array([1.70890018]),
 array([1.82036467]),
 array([1.89156581]),
 array([-2.53005367]),
 array([5.27648972]),
 array([0.64758218]),
 array([0.373211]),
 array([3.05864634]),
 array([-1.89871082]),
 array([-1.29176595]),
 array([2.9558284]),
 array([1.08880151]),
 array([1.47235963]),
 array([-1.05774074]),
 array([-2.90224749]),
 array([0.42047158]),
 array([1.36660685]),
 array(

## Initialize x

In [19]:
x = np.ones((d,1))

In [21]:
print(np.dot(A[393,:],x))

[-0.70230438]


## Generate A, b

* A is independtly sampled from d-dimensional Gaussian distribution with mean 0 and covariacen 0.1. 
* b_i is Bernoulli random variable with probability sigmoid(x^T A[i,:])

In [16]:
mu ,sigma =0,  0.1
n ,d = 1000,  2000
A = np.random.normal(mu, sigma, (n,d))

In [7]:
labd = 1/np.sqrt(n)

In [8]:
labd

0.03162277660168379

In [27]:
p = []
for i in range(n):
    p.append(sigmoid(np.dot(A[i,:], x)))

In [28]:
p

[array([0.99878817]),
 array([0.61093938]),
 array([0.96237336]),
 array([0.39503644]),
 array([0.10199433]),
 array([0.0061012]),
 array([0.99775124]),
 array([0.01325193]),
 array([0.00658003]),
 array([0.0050287]),
 array([0.00383306]),
 array([0.99726438]),
 array([0.0279244]),
 array([0.88551132]),
 array([0.05766193]),
 array([0.25124574]),
 array([0.04326004]),
 array([0.42831061]),
 array([0.9580129]),
 array([0.97163911]),
 array([0.15969765]),
 array([0.00786031]),
 array([0.47904593]),
 array([0.98188131]),
 array([0.29731906]),
 array([7.10593044e-05]),
 array([0.95539707]),
 array([0.07956942]),
 array([0.99971656]),
 array([4.51288717e-06]),
 array([0.60229354]),
 array([0.83517987]),
 array([0.279076]),
 array([0.11182289]),
 array([0.99698163]),
 array([0.97565657]),
 array([0.02290552]),
 array([0.00010584]),
 array([0.03411246]),
 array([0.00362132]),
 array([0.30525639]),
 array([0.97160253]),
 array([0.5093592]),
 array([0.97945508]),
 array([0.99999933]),
 array([0

In [29]:
b = np.random.binomial(1, p, size = (n,1))

In [30]:
b

array([[1],
       [0],
       [1],
       [0],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [1],
       [0],
       [1],
       [0],
       [0],
       [0],
       [1],
       [1],
       [1],
       [0],
       [0],
       [1],
       [1],
       [0],
       [0],
       [1],
       [0],
       [1],
       [0],
       [1],
       [0],
       [0],
       [1],
       [1],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
       [0],
       [1],
       [1],
       [0],
       [1],
       [1],
       [1],
       [1],
       [0],
       [1],
       [0],
       [0],
       [0],
       [1],
       [1],
       [1],
       [1],
       [1],
       [0],
       [0],
       [1],
       [0],
       [1],
       [1],
       [1],
       [1],
       [0],
       [1],
       [1],
       [0],
       [1],
       [0],
       [1],
       [0],
       [1],
       [0],
       [1],
       [0],
       [1],
       [1],
       [0],
    

## Training 

In [None]:
val_trend = []
learning_rate = 0.01
for epoch in range(1000):
    dx = dev_LR_obj(x,A,b,labd)
    x = x - np.dot(learning_rate, dx) 
    
    if epoch % 100 == 0:
        func_val = logistic_regression_obj(x,A,b,labd)
        val_trend.append(func_val)
        print('function val after %d epochs is %1.8f' % (epoch,func_val ))

function val after 0 epochs is 4.95680673
