In [1]:
import numpy as np
import time
import matplotlib.pyplot as plt
import cvxpy as cp

## Generate the random Dataset


In [2]:
# We first generate a random dataset with number of features (m = 10) and number of instances (n = 100)
# We also generate a random label vector y \in {-1,1}

n = 100 # Number of instances
m = 10  # Number of Features

X = np.random.rand(n,m)
y = np.random.rand(n) # n-dimensional vector
ybin = [(int(yi >= 0.5) - int(yi < 0.5)) for yi in y]
y = np.array(ybin)
w = np.random.rand(m) # m-dimensional vector
print(y)
print(X)

[ 1  1 -1  1 -1 -1 -1 -1  1 -1  1  1 -1 -1 -1  1  1 -1  1 -1  1  1 -1  1
 -1  1  1  1  1 -1 -1 -1  1  1 -1 -1  1 -1  1 -1  1 -1 -1 -1 -1  1  1  1
  1 -1  1  1 -1 -1  1  1 -1  1 -1 -1 -1  1  1 -1  1 -1 -1 -1 -1  1  1  1
 -1 -1  1  1 -1 -1  1  1 -1  1 -1  1  1  1  1 -1 -1 -1  1  1 -1  1 -1 -1
  1 -1 -1 -1]
[[0.91473138 0.0650588  0.90545463 0.43170181 0.52135365 0.60782222
  0.33897151 0.48266951 0.81108744 0.2931116 ]
 [0.26836888 0.50158593 0.67506033 0.18902072 0.58672815 0.75235711
  0.74166072 0.61554244 0.53500966 0.68622984]
 [0.73340325 0.60494264 0.86859463 0.23673276 0.36851718 0.22172385
  0.46184412 0.30289697 0.86241353 0.01536816]
 [0.553998   0.96629304 0.07847826 0.91835454 0.21326236 0.50735612
  0.93592945 0.50347157 0.69831463 0.0999497 ]
 [0.82634161 0.91237282 0.77895022 0.3638494  0.28488727 0.0014513
  0.12328917 0.1437361  0.76102399 0.80935997]
 [0.20602647 0.39765355 0.85582931 0.70943579 0.89609137 0.29166501
  0.94477585 0.96633106 0.5959912  0.43769565]
 [0.3

## An Implementation of the Logistic Loss 


In [3]:
def LogisticLossNaive(w, X, y, lam):
    # Computes the cost function for all the training samples
    # where f is the function value and g is the gradient
    f = 0.0
    (n, m) = X.shape
    y_pred = np.zeros(n)
    # Cost function
    for i in range(n):
        for j in range(m):
            y_pred[i] += w[j] * X[i][j] 
            
        f += np.log(1 + np.exp(-y[i] * y_pred[i]))
        
    for j in range(m):
        f += (lam * w[j] * w[j]) / 2
    
    # Gradient
    g = np.zeros(m)
    for k in range(m):
        for i in range(n):
            g[k] += -1 * y[i] * X[i][k] / (1 + np.exp(y_pred[i] * y[i]))
            
        g[k] += lam * w[k]
    
    return [f, g]     

In [4]:
start = time.time()
[f,g] = LogisticLossNaive(w,X,y,1)
end = time.time()
print("Time Taken = " + str(end - start))
print("Function value = " + str(f))
print("Printing Gradient:")
print(g)

Time Taken = 0.004483461380004883
Function value = 122.274444023278
Printing Gradient:
[19.72058111 21.83922336 23.09055854 22.32111737 21.92809695 17.94364089
 23.12795594 23.24772995 20.40605657 19.31804168]


## An Implementation of the Least Squares 


In [5]:
def LeastSquaresNaive(w, X, y, lam):
    # Computes the cost function for all the training samples
    # where f is the function value and g is the gradient
    f = 0.0
    (n, m) = X.shape
    y_pred = np.zeros(n)
    # Cost function
    for i in range(n):
        for j in range(m):
            y_pred[i] += w[j] * X[i][j] 
            
        f += (y[i] - y_pred[i]) ** 2
        
    for j in range(m):
        f += (lam * w[j] * w[j]) / 2
    
    # Gradient
    g = np.zeros(m)
    for k in range(m):
        for i in range(n):
            g[k] += 2 * (y_pred[i] - y[i]) * X[i][k]
            
        g[k] += lam * w[k]
        
    return [f, g]     

In [6]:
start = time.time()
[f,g] = LeastSquaresNaive(w,X,y,1)
end = time.time()
print("Time Taken = " + str(end - start))
print("Function value = " + str(f))
print("Printing Gradient:")
print(g)

Time Taken = 0.002254486083984375
Function value = 623.0566842351129
Printing Gradient:
[224.28352049 248.06363597 238.69580447 236.65993001 234.35444649
 228.31882478 254.21298782 262.03529849 231.68054094 228.04989163]


## An Implementation of the Hinge Loss 

In [7]:
def HingeLossNaive(w, X, y, lam):
    # Computes the cost function for all the training samples
    # where f is the function value and g is the gradient
    f = 0.0
    (n, m) = X.shape
    y_pred = np.zeros(n)
    # Cost function
    for i in range(n):
        for j in range(m):
            y_pred[i] += w[j] * X[i][j] 
            
        f += max(0, 1 - y[i] * y_pred[i])
        
    for j in range(m):
        f += (lam * w[j] * w[j]) / 2
    
    # Gradient
    g = np.zeros(m)
    for k in range(m):
        for i in range(n):
            if y_pred[i]*y[i] <= 1:
                g[k] += -1 * y[i] * X[i][k]
            
        g[k] += lam * w[k]
    
    return [f, g]

In [8]:
start = time.time()
[f,g] = HingeLossNaive(w,X,y,1)
end = time.time()
print("Time Taken = " + str(end - start))
print("Function value = " + str(f))
print("Printing Gradient:")
print(g)

Time Taken = 0.0007905960083007812
Function value = 161.9737114674038
Printing Gradient:
[24.7473981  26.60377429 28.52150374 27.55426854 27.0813504  22.41112162
 28.50749887 28.44352916 25.40888073 23.55496323]


## Scalability of the code

In [9]:
n = 100
m = 10000

X = np.random.rand(n,m)
y = np.random.rand(n)
ybin = [(int(yi >= 0.5) - int(yi < 0.5)) for yi in y]
y = np.array(ybin)
w = np.random.rand(m, 1)

start = time.time()
[f,g] = LogisticLossNaive(w,X,y,1)
end = time.time()
print("Logistic Loss")
print("Time Taken = " + str(end - start))
print("Function value = " + str(f))
print("Printing Gradient:")
print(g)

start = time.time()
[f,g] = LeastSquaresNaive(w,X,y,1)
end = time.time()
print("Least Square")
print("Time Taken = " + str(end - start))
print("Function value = " + str(f))
print("Printing Gradient:")
print(g)

start = time.time()
[f,g] = HingeLossNaive(w,X,y,1)
end = time.time()
print("Hinge Loss")
print("Time Taken = " + str(end - start))
print("Function value = " + str(f))
print("Printing Gradient:")
print(g)

  f += np.log(1 + np.exp(-y[i] * y_pred[i]))
  g[k] += -1 * y[i] * X[i][k] / (1 + np.exp(y_pred[i] * y[i]))


Logistic Loss
Time Taken = 3.4032998085021973
Function value = [inf]
Printing Gradient:
[24.91565548 25.48367699 26.50079266 ... 23.93407991 24.09286659
 24.81819486]
Least Square
Time Taken = 2.146094560623169
Function value = [6.4104154e+08]
Printing Gradient:
[262635.84102936 262042.14901381 257794.55571874 ... 246854.21417646
 260135.42399998 271424.67119628]
Hinge Loss
Time Taken = 2.1042685508728027
Function value = [123291.48085078]
Printing Gradient:
[24.91565548 25.48367699 26.50079266 ... 23.93407991 24.09286659
 24.81819486]


## Implement a vectorized version 

In [10]:
def LogisticLossVec(w, X, y, lam):
    # Computes the cost function for all the training samples
    # where f is the function value and g is the gradient
    y_pred = X @ w
    f = np.sum(np.log(1 + np.exp(-y_pred * y))) + lam * np.linalg.norm(w) / 2
    g = - X.T @ (y / (1 + np.exp(y * y_pred))) + lam * w
    return [f, g]     

In [11]:
def LeastSquaresVec(w, X, y, lam):
    # Computes the cost function for all the training samples
    # where f is the function value and g is the gradient
    y_pred = X @ w
    f = np.sum((y_pred - y) ** 2) + lam * np.linalg.norm(w) / 2
    g = 2 * X.T @ (y_pred - y) + lam * w
    return [f, g]     

In [12]:
def HingeLossVec(w, X, y, lam):
    # Computes the cost function for all the training samples
    # where f is the function value and g is the gradient
    y_pred = X @ w
    f = np.sum((1 >= y_pred * y) * (1 - y_pred*y)) + lam * np.linalg.norm(w) / 2
    g = - X.T @ ((1 >= y_pred * y) * y) + lam * w
    return [f, g]

In [13]:
n = 100
m = 10000

X = np.random.rand(n,m)
y = np.random.rand(n)
ybin = [(int(yi >= 0.5) - int(yi < 0.5)) for yi in y]
y = np.array(ybin)
w = np.random.rand(m, 1)

start = time.time()
[f,g] = LogisticLossVec(w,X,y,1)
end = time.time()
print("Logistic Loss")
print("Time Taken = " + str(end - start))
print("Function value = " + str(f))
print("Printing Gradient:")
print(g)

start = time.time()
[f,g] = LeastSquaresVec(w,X,y,1)
end = time.time()
print("Least Square")
print("Time Taken = " + str(end - start))
print("Function value = " + str(f))
print("Printing Gradient:")
print(g)

start = time.time()
[f,g] = HingeLossVec(w,X,y,1)
end = time.time()
print("Hinge Loss")
print("Time Taken = " + str(end - start))
print("Function value = " + str(f))
print("Printing Gradient:")
print(g)

Logistic Loss
Time Taken = 0.013580322265625
Function value = inf
Printing Gradient:
[[ 0.11293931  0.11293931 55.37390849 ...  0.11293931 55.37390849
  55.37390849]
 [ 0.15236677  0.15236677 51.32776752 ...  0.15236677 51.32776752
  51.32776752]
 [ 0.13967895  0.13967895 52.65474957 ...  0.13967895 52.65474957
  52.65474957]
 ...
 [ 0.07231739  0.07231739 47.94471807 ...  0.07231739 47.94471807
  47.94471807]
 [ 0.17878555  0.17878555 46.34448554 ...  0.17878555 46.34448554
  46.34448554]
 [ 0.32927288  0.32927288 50.57995194 ...  0.32927288 50.57995194
  50.57995194]]
Least Square
Time Taken = 0.010885238647460938
Function value = 62308794146.3032
Printing Gradient:
[[275753.57521969 275753.57521969 275974.61909643 ... 275753.57521969
  275974.61909643 275974.61909643]
 [255328.39499895 255328.39499895 255533.09660197 ... 255328.39499895
  255533.09660197 255533.09660197]
 [262161.56248494 262161.56248494 262371.62276742 ... 262161.56248494
  262371.62276742 262371.62276742]
 ...
 [2

  f = np.sum(np.log(1 + np.exp(-y_pred * y))) + lam * np.linalg.norm(w) / 2
  g = - X.T @ (y / (1 + np.exp(y * y_pred))) + lam * w


## Lets us code the above Loss Fuctions in CVXPY!

CVXPY is an open source Python-embedded modeling language for convex optimization problems. Link: https://www.cvxpy.org/

In [None]:
def LogisticLossCVXPY(w, X, y, lam):
    # Computes the cost function for all the training samples
    # where f is the function value and g is the gradient
    expression = cp.sum(cp.log(1 + cp.exp(cp.multiply(-y, X @ w)))) + lam * cp.norm(w, 2) / 2
    Problem = cp.Problem(cp.Minimize(expression))
    f = expression.value
    return [f, g]

In [None]:
def LeastSquaresCVXPY(w, X, y, lam):
    # Computes the cost function for all the training samples
    # where f is the function value and g is the gradient
    expression = cp.sum_squares(X @ w - y) + lam * cp.norm(w, 2) / 2
    Problem = cp.Problem(cp.Minimize(expression))
    f = expression.value
    return [f, g]

In [None]:
def HingeLossCVXPY(w, X, y, lam):
    # Computes the cost function for all the training samples
    # where f is the function value and g is the gradient
    expression = cp.sum(cp.pos(1 - cp.multiply(y, X @ w))) + lam * cp.norm(w, 2) / 2
    Problem = cp.Problem(cp.Minimize(expression))
    f = expression.value
    return [f, g]

In [None]:
import numpy as np
n = 100
m = 10

X = np.random.rand(n,m)
y = np.random.rand(n)
ybin = [(int(yi >= 0.5) - int(yi < 0.5)) for yi in y]
y = np.array(ybin)
w = np.random.rand(m, 1)

start = time.time()
[f1,g1] = LogisticLossCVXPY(w,X,y,1)
end = time.time()
print("Time Taken = " + str(end - start))
print("Function value Naive = " + str(f1))
print("Printing Gradient Naive:")
print(g1)

start = time.time()
[f2,g2] = LeastSquaresCVXPY(w,X,y,1)
end = time.time()
print("Time Taken = " + str(end - start))
print("Function value For = " + str(f2))
print("Printing Gradient For:")
print(g2)

start = time.time()
[f2,g2] = HingeLossCVXPY(w,X,y,1)
end = time.time()
print("Time Taken = " + str(end - start))
print("Function value For = " + str(f2))
print("Printing Gradient For:")
print(g2)

## Compare the losses with Graph



In [None]:
def LogisticLossFun(w, X, y, lam):
    return error_ll

def LeastSquaresFun(w, X, y, lam):
    return error_ls

def HingeLossFun(w, X, y, lam):
    return error_hl

def plot_errors(error_ll, error_ls, error_hl, num):
    plt.plot(num, error_ll, label="Logistic Loss")
    plt.plot(num, error_ls, label="Least Squares")
    plt.plot(num, error_hl, label="Hinge Loss")
    plt.show()
    return

In [None]:
n = 100
m = 10000

X = np.random.rand(n,m)
y = np.random.rand(n)
ybin = [(int(yi >= 0.5) - int(yi < 0.5)) for yi in y]
y = np.array(ybin)
w = np.random.rand(m, 1)

error_ll = LogisticLossFun(w,X,y,1)
error_ls = LeastSquaresFun(w,X,y,1)
error_hl = HingeLossFun(w,X,y,1)
plot_errors(error_ll, error_ls, error_hl, 100)