In [17]:
import numpy as np
import time
import matplotlib.pyplot as plt
import cvxpy as cp

## Generate the random Dataset


In [18]:
# We first generate a random dataset with number of features (m = 10) and number of instances (n = 100)
# We also generate a random label vector y \in {-1,1}

n = 100 # Number of instances
m = 10  # Number of Features 

X = np.random.rand(n,m) 
y = np.random.rand(n) # n-dimensional vector
ybin = [(int(yi >= 0.5) - int(yi < 0.5)) for yi in y]
y = np.array(ybin)
w = np.random.rand(m, 1) # m-dimensional vector
print(y)
print(X)

[ 1 -1 -1  1 -1 -1 -1 -1 -1 -1 -1  1 -1  1 -1 -1  1 -1  1 -1 -1 -1  1 -1
 -1  1 -1  1 -1  1 -1 -1  1  1  1 -1  1  1 -1 -1  1 -1  1 -1 -1  1  1  1
  1 -1 -1  1 -1 -1 -1  1  1 -1 -1  1 -1  1  1 -1  1 -1  1  1  1  1 -1 -1
 -1  1 -1 -1  1  1  1 -1 -1  1  1 -1 -1  1 -1  1 -1 -1  1  1  1 -1 -1 -1
 -1 -1  1  1]
[[0.03464071 0.5596133  0.80104921 0.11227437 0.67873981 0.24390919
  0.74389977 0.77176574 0.0469     0.69355279]
 [0.10514694 0.48771796 0.60912142 0.74569407 0.2249291  0.24216052
  0.35572776 0.73829658 0.74767324 0.63374237]
 [0.53761657 0.48154364 0.8454833  0.28321559 0.30278682 0.41910136
  0.57233946 0.54014636 0.27344629 0.30513814]
 [0.2437397  0.48836195 0.26845281 0.85772027 0.77281827 0.40305602
  0.9900017  0.78947151 0.38301982 0.31218903]
 [0.3890894  0.99608747 0.94081578 0.30821005 0.37089578 0.92860704
  0.84374602 0.24163896 0.42747421 0.79130684]
 [0.1580376  0.69377947 0.08193603 0.38738429 0.59409362 0.62704508
  0.58639908 0.1702625  0.9532205  0.55068155]
 [0.

## An Implementation of the Logistic Loss 


In [19]:
def LogisticLossNaive(w, X, y, lam):
    # Computes the cost function for all the training samples
    # where f is the function value and g is the gradient
    f = 0.0
    (n, m) = X.shape
    y_pred = np.zeros(n)
    # Cost function
    for i in range(n):
        for j in range(m):
            y_pred[i] += w[j] * X[i][j] 
            
        f += np.log(1 + np.exp(-y[i] * y_pred[i]))
        
    for j in range(m):
        f += (lam * w[j] * w[j]) / 2
    
    # Gradient
    g = np.zeros(m)
    for k in range(m):
        for i in range(n):
            g[k] += -1 * y[i] * X[i][k] / (1 + np.exp(y_pred[i] * y[i]))
            
        g[k] += lam * w[k]
    
    return [f, g]     

In [20]:
start = time.time()
[f,g] = LogisticLossNaive(w,X,y,1)
end = time.time()
print("Time Taken = " + str(end - start))
print("Function value = " + str(f))
print("Printing Gradient:")
print(g)

Time Taken = 0.0058286190032958984
Function value = [148.47457196]
Printing Gradient:
[20.99227956 23.60082748 20.27172267 24.89235895 21.80355854 25.90878363
 23.42307349 22.70935056 25.27119556 26.05699708]


## An Implementation of the Least Squares 


In [21]:
def LeastSquaresNaive(w, X, y, lam):
    # Computes the cost function for all the training samples
    # where f is the function value and g is the gradient
    f = 0.0
    (n, m) = X.shape
    y_pred = np.zeros(n)
    # Cost function
    for i in range(n):
        for j in range(m):
            y_pred[i] += w[j] * X[i][j] 
            
        f += (y[i] - y_pred[i]) ** 2
        
    for j in range(m):
        f += (lam * w[j] * w[j]) / 2
    
    # Gradient
    g = np.zeros(m)
    for k in range(m):
        for i in range(n):
            g[k] += 2 * (y_pred[i] - y[i]) * X[i][k]
            
        g[k] += lam * w[k]
        
    return [f, g]     

In [22]:
start = time.time()
[f,g] = LeastSquaresNaive(w,X,y,1)
end = time.time()
print("Time Taken = " + str(end - start))
print("Function value = " + str(f))
print("Printing Gradient:")
print(g)

Time Taken = 0.0027070045471191406
Function value = [824.90240215]
Printing Gradient:
[257.4672939  264.68828218 253.29646101 268.65462663 246.57578303
 295.96194524 274.58732448 266.3442941  288.37065213 282.44703588]


## An Implementation of the Hinge Loss 

In [26]:
def HingeLossNaive(w, X, y, lam):
    # Computes the cost function for all the training samples
    # where f is the function value and g is the gradient
    f = 0.0
    (n, m) = X.shape
    y_pred = np.zeros(n)
    # Cost function
    for i in range(n):
        for j in range(m):
            y_pred[i] += w[j] * X[i][j] 
            
        f += max(0, 1 - y[i] * y_pred[i])
        
    for j in range(m):
        f += (lam * w[j] * w[j]) / 2
    
    # Gradient
    g = np.zeros(m)
    for k in range(m):
        for i in range(n):
            if y_pred[i]*y[i] <= 1:
                g[k] += -1 * y[i] * X[i][k]
            
        g[k] += lam * w[k]
    
    return [f, g]

In [27]:
start = time.time()
[f,g] = HingeLossNaive(w,X,y,1)
end = time.time()
print("Time Taken = " + str(end - start))
print("Function value = " + str(f))
print("Printing Gradient:")
print(g)

Time Taken = 2.1474955081939697
Function value = [138367.64287142]
Printing Gradient:
[30.19117072 26.20545841 24.32432151 ... 27.32422749 32.55625335
 26.52044547]


## Scalability of the code

In [28]:
n = 100
m = 10000

X = np.random.rand(n,m)
y = np.random.rand(n)
ybin = [(int(yi >= 0.5) - int(yi < 0.5)) for yi in y]
y = np.array(ybin)
w = np.random.rand(m, 1)

start = time.time()
[f,g] = LogisticLossNaive(w,X,y,1)
end = time.time()
print("Logistic Loss")
print("Time Taken = " + str(end - start))
print("Function value = " + str(f))
print("Printing Gradient:")
print(g)

start = time.time()
[f,g] = LeastSquaresNaive(w,X,y,1)
end = time.time()
print("Least Square")
print("Time Taken = " + str(end - start))
print("Function value = " + str(f))
print("Printing Gradient:")
print(g)

start = time.time()
[f,g] = HingeLossNaive(w,X,y,1)
end = time.time()
print("Hinge Loss")
print("Time Taken = " + str(end - start))
print("Function value = " + str(f))
print("Printing Gradient:")
print(g)

  f += np.log(1 + np.exp(-y[i] * y_pred[i]))
  g[k] += -1 * y[i] * X[i][k] / (1 + np.exp(y_pred[i] * y[i]))


Logistic Loss
Time Taken = 3.445894718170166
Function value = [inf]
Printing Gradient:
[24.97999543 29.78695387 23.55741636 ... 24.72331878 25.67714269
 25.28994668]
Least Square
Time Taken = 2.1516599655151367
Function value = [6.13284004e+08]
Printing Gradient:
[247737.12050586 273182.89237057 234231.6264131  ... 249177.673977
 247526.57480356 257983.57958173]
Hinge Loss
Time Taken = 2.102675437927246
Function value = [125493.21623385]
Printing Gradient:
[24.97999543 29.78695387 23.55741636 ... 24.72331878 25.67714269
 25.28994668]


## Implement a vectorized version 

In [51]:
def LogisticLossVec(w, X, y, lam):
    # Computes the cost function for all the training samples
    # where f is the function value and g is the gradient
    y_pred = X @ w
    f = np.sum(np.log(1 + np.exp(-y_pred * y))) + lam * np.linalg.norm(w) / 2
    g = - X.T @ (y / (1 + np.exp(y * y_pred))) + lam * w
    return [f, g]     

In [52]:
def LeastSquaresVec(w, X, y, lam):
    # Computes the cost function for all the training samples
    # where f is the function value and g is the gradient
    y_pred = X @ w
    f = np.linalg.norm(y_pred - y) + lam * np.linalg.norm(w) / 2
    g = 2 * X.T @ (y_pred - y) + lam * w
    return [f, g]     

In [53]:
def HingeLossVec(w, X, y, lam):
    # Computes the cost function for all the training samples
    # where f is the function value and g is the gradient
    y_pred = X @ w
    f = np.sum((1 >= y_pred * y) * (1 - y_pred*y)) + lam * np.linalg.norm(w) / 2
    g = - X.T @ ((1 >= y_pred * y) * y) + lam * w
    return [f, g]

In [54]:
n = 100
m = 10000

X = np.random.rand(n,m)
y = np.random.rand(n)
ybin = [(int(yi >= 0.5) - int(yi < 0.5)) for yi in y]
y = np.array(ybin)
w = np.random.rand(m, 1)

start = time.time()
[f,g] = LogisticLossVec(w,X,y,1)
end = time.time()
print("Logistic Loss")
print("Time Taken = " + str(end - start))
print("Function value = " + str(f))
print("Printing Gradient:")
print(g)

start = time.time()
[f,g] = LeastSquaresVec(w,X,y,1)
end = time.time()
print("Least Square")
print("Time Taken = " + str(end - start))
print("Function value = " + str(f))
print("Printing Gradient:")
print(g)

start = time.time()
[f,g] = HingeLossVec(w,X,y,1)
end = time.time()
print("Hinge Loss")
print("Time Taken = " + str(end - start))
print("Function value = " + str(f))
print("Printing Gradient:")
print(g)

Logistic Loss
Time Taken = 0.011443853378295898
Function value = inf
Printing Gradient:
[[ 0.61002058 51.91568763  0.61002058 ...  0.61002058  0.61002058
  51.91568763]
 [ 0.44569461 55.35143704  0.44569461 ...  0.44569461  0.44569461
  55.35143704]
 [ 0.75720532 44.1527623   0.75720532 ...  0.75720532  0.75720532
  44.1527623 ]
 ...
 [ 0.1681764  46.52332579  0.1681764  ...  0.1681764   0.1681764
  46.52332579]
 [ 0.42350058 51.4038948   0.42350058 ...  0.42350058  0.42350058
  51.4038948 ]
 [ 0.56448345 48.80009498  0.56448345 ...  0.56448345  0.56448345
  48.80009498]]
Least Square
Time Taken = 0.010295867919921875
Function value = 249996.4678035193
Printing Gradient:
[[256271.37863064 256476.60129884 256271.37863064 ... 256271.37863064
  256271.37863064 256476.60129884]
 [274402.18268715 274621.80565688 274402.18268715 ... 274402.18268715
  274402.18268715 274621.80565688]
 [216910.20202959 217083.78425753 216910.20202959 ... 216910.20202959
  216910.20202959 217083.78425753]
 ...


  f = np.sum(np.log(1 + np.exp(-y_pred * y))) + lam * np.linalg.norm(w) / 2
  g = - X.T @ (y / (1 + np.exp(y * y_pred))) + lam * w


## Lets us code the above Loss Fuctions in CVXPY!

CVXPY is an open source Python-embedded modeling language for convex optimization problems. Link: https://www.cvxpy.org/

In [None]:
def LogisticLossCVXPY(w, X, y, lam):
    # Computes the cost function for all the training samples
    # where f is the function value and g is the gradient
    return [f, g]

In [None]:
def LeastSquaresCVXPY(w, X, y, lam):
    # Computes the cost function for all the training samples
    # where f is the function value and g is the gradient
    
    return [f, g]

In [None]:
def HingeLossCVXPY(w, X, y, lam):
    # Computes the cost function for all the training samples
    # where f is the function value and g is the gradient
    return [f, g]

In [None]:
import numpy as np
n = 100
m = 10

X = np.random.rand(n,m)
y = np.random.rand(n)
ybin = [(int(yi >= 0.5) - int(yi < 0.5)) for yi in y]
y = np.array(ybin)
w = np.random.rand(m, 1)

start = time.time()
[f1,g1] = LogisticLossCVXPY(w,X,y,1)
end = time.time()
print("Time Taken = " + str(end - start))
print("Function value Naive = " + str(f1))
print("Printing Gradient Naive:")
print(g1)

start = time.time()
[f2,g2] = LeastSquaresCVXPY(w,X,y,1)
end = time.time()
print("Time Taken = " + str(end - start))
print("Function value For = " + str(f2))
print("Printing Gradient For:")
print(g2)

start = time.time()
[f2,g2] = HingeLossCVXPY(w,X,y,1)
end = time.time()
print("Time Taken = " + str(end - start))
print("Function value For = " + str(f2))
print("Printing Gradient For:")
print(g2)

## Compare the losses with Graph



In [None]:
def LogisticLossFun(w, X, y, lam):
    return error_ll

def LeastSquaresFun(w, X, y, lam):
    return error_ls

def HingeLossFun(w, X, y, lam):
    return error_hl

def plot_errors(error_ll, error_ls, error_hl, num):
    plt.plot(num, error_ll, label="Logistic Loss")
    plt.plot(num, error_ls, label="Least Squares")
    plt.plot(num, error_hl, label="Hinge Loss")
    plt.show()
    return

In [None]:
n = 100
m = 10000

X = np.random.rand(n,m)
y = np.random.rand(n)
ybin = [(int(yi >= 0.5) - int(yi < 0.5)) for yi in y]
y = np.array(ybin)
w = np.random.rand(m, 1)

error_ll = LogisticLossFun(w,X,y,1)
error_ls = LeastSquaresFun(w,X,y,1)
error_hl = HingeLossFun(w,X,y,1)
plot_errors(error_ll, error_ls, error_hl, 100)