In [77]:
import math, copy
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import SGDRegressor, SGDClassifier

my_ID = 400132290
np.random.seed(my_ID)
np.set_printoptions(precision=2)# reduced display precision on numpy arrays

In [78]:
data = load_breast_cancer()
x_data = pd.DataFrame(data.data, columns=data.feature_names)
y_data = data.target
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.2, random_state=my_ID)

sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

print(f"X Shape: {x_train.shape}, X Type:{type(x_train)})")
#print(x_train)
print(f"y Shape: {y_train.shape}, y Type:{type(y_train)})")
#print(y_train)

X Shape: (455, 30), X Type:<class 'numpy.ndarray'>)
y Shape: (455,), y Type:<class 'numpy.ndarray'>)


**to do**
- my version of Batch and SGD
- compute missclassification rate
- F1 ?
- plot precision/recall PR curve and ROC curve
- something about B???

In [79]:
# sklearn
# batch
log_reg =LogisticRegression(random_state=my_ID)
log_reg.fit(x_train, y_train)
print(f"number of iterations completed: {log_reg.n_iter_}")
log_reg_b = log_reg.intercept_
log_reg_w = log_reg.coef_
print(f"model parameters (batch): w: {log_reg_w}, b:{log_reg_b}")

predictions = log_reg.predict(x_test)
print(f"Prediction on training set (batch):\n{predictions[:10]}" )
print(f"Target values (batch)\n{y_test[:10]}")

# SGD
log_reg = SGDClassifier(random_state=my_ID)
log_reg.fit(x_train, y_train)
print(f"number of iterations completed: {log_reg.n_iter_}")
log_reg_b = log_reg.intercept_
log_reg_w = log_reg.coef_
print(f"model parameters (SGD): w: {log_reg_w}, b:{log_reg_b}")

predictions = log_reg.predict(x_test)
print(f"Prediction on training set (SGD):\n{predictions[:10]}" )
# predictions_round = [0 if num < 0.5 else 1 for num in predictions]
# print(f"Prediction on training set rounded (SGD):\n{predictions_round[:10]}" )
print(f"Target values (SGD)\n{y_test[:10]}")

number of iterations completed: [32]
model parameters (batch): w: [-0.03], b:[[-0.48 -0.46 -0.47 -0.5  -0.09  0.56 -0.9  -1.    0.17  0.31 -1.11  0.12
  -0.72 -0.87 -0.33  0.59  0.2  -0.24  0.49  0.48 -0.89 -1.28 -0.76 -0.87
  -0.75  0.08 -1.08 -0.71 -0.94 -0.21]]
Prediction on training set (batch):
[0 1 0 1 1 1 1 1 1 1]
Target values (batch)
[0 1 0 1 1 1 1 1 1 1]
number of iterations completed: 36
model parameters (SGD): w: [-3.19], b:[[-1.85  0.52 -1.86 -1.75 -1.46  9.83 -6.44 -6.09  2.91 -2.29 -9.15  2.19
  -2.9  -8.67 -3.8  -1.04  9.36 -4.68  3.9   4.85 -4.52 -9.59 -2.7  -5.87
  -1.1   0.76 -8.23 -1.82 -6.26  0.27]]
Prediction on training set (SGD):
[0 1 0 1 1 1 1 1 1 1]
Target values (SGD)
[0 1 0 1 1 1 1 1 1 1]


In [80]:
def sigmoid(z):
    """
    Compute the sigmoid of z

    Args:
        z (ndarray): A scalar, numpy array of any size.

    Returns:
        g (ndarray): sigmoid(z), with the same shape as z
         
    """

    g = 1/(1+np.exp(-z))
   
    return g
def compute_cost_logistic(X, y, w, b):
    """
    Computes cost

    Args:
      X (ndarray (m,n)): Data, m examples with n features
      y (ndarray (m,)) : target values
      w (ndarray (n,)) : model parameters  
      b (scalar)       : model parameter
      
    Returns:
      cost (scalar): cost
    """

    m = X.shape[0]
    cost = 0.0
    for i in range(m):
        z_i = np.dot(X[i],w) + b
        f_wb_i = sigmoid(z_i)
        cost +=  -y[i]*np.log(f_wb_i) - (1-y[i])*np.log(1-f_wb_i)
             
    cost = cost / m
    return cost

def compute_gradient_logistic(X, y, w, b): 
    """
    Computes the gradient for linear regression 
 
    Args:
      X (ndarray (m,n): Data, m examples with n features
      y (ndarray (m,)): target values
      w (ndarray (n,)): model parameters  
      b (scalar)      : model parameter
    Returns
      dj_dw (ndarray (n,)): The gradient of the cost w.r.t. the parameters w. 
      dj_db (scalar)      : The gradient of the cost w.r.t. the parameter b. 
    """
    m,n = X.shape
    dj_dw = np.zeros((n,))                           #(n,)
    dj_db = 0.

    for i in range(m):
        f_wb_i = sigmoid(np.dot(X[i],w) + b)          #(n,)(n,)=scalar
        err_i  = f_wb_i  - y[i]                       #scalar
        for j in range(n):
            dj_dw[j] = dj_dw[j] + err_i * X[i,j]      #scalar
        dj_db = dj_db + err_i
    dj_dw = dj_dw/m                                   #(n,)
    dj_db = dj_db/m                                   #scalar
        
    return dj_db, dj_dw  

def gradient_descent(X, y, w_in, b_in, alpha, num_iters): 
    """
    Performs batch gradient descent
    
    Args:
      X (ndarray (m,n)   : Data, m examples with n features
      y (ndarray (m,))   : target values
      w_in (ndarray (n,)): Initial values of model parameters  
      b_in (scalar)      : Initial values of model parameter
      alpha (float)      : Learning rate
      num_iters (scalar) : number of iterations to run gradient descent
      
    Returns:
      w (ndarray (n,))   : Updated values of parameters
      b (scalar)         : Updated value of parameter 
    """
    # An array to store cost J and w's at each iteration primarily for graphing later
    J_history = []
    w = copy.deepcopy(w_in)  #avoid modifying global w within function
    b = b_in
    
    for i in range(num_iters):
        # Calculate the gradient and update the parameters
        dj_db, dj_dw = compute_gradient_logistic(X, y, w, b)   

        # Update Parameters using w, b, alpha and gradient
        w = w - alpha * dj_dw               
        b = b - alpha * dj_db               
      
        # Save cost J at each iteration
        if i<100000:      # prevent resource exhaustion 
            J_history.append( compute_cost_logistic(X, y, w, b) )

        # Print cost every at intervals 10 times or as many iterations if < 10
        if i% math.ceil(num_iters / 10) == 0:
            print(f"Iteration {i:4d}: Cost {J_history[-1]}   ")
        
    return w, b, J_history         #return final w,b and J history for graphing

def predict(X, w, b): 
    """
    Predict whether the label is 0 or 1 using learned logistic
    regression parameters w
    
    Args:
      X : (ndarray Shape (m,n)) data, m examples by n features
      w : (ndarray Shape (n,))  values of parameters of the model      
      b : (scalar)              value of bias parameter of the model

    Returns:
      p : (ndarray (m,)) The predictions for X using a threshold at 0.5
    """
    # number of training examples
    m, n = X.shape   
    p = np.zeros(m)
    J_history = []
   
    ### START CODE HERE ### 
    # Loop over each example
    for i in range(m):   
        f_wb_i = sigmoid(np.dot(X[i],w) +b)
        
        if f_wb_i >= 0.5:
            p[i] = 1
        else:
            p[i] = 0
        
    ### END CODE HERE ### 
    return p

In [85]:
w_tmp  = np.zeros_like(x_train[0])
b_tmp  = 0.
alph = 0.1
iters = 1000

w_out, b_out, _ = gradient_descent(x_train, y_train, w_tmp, b_tmp, alph, iters)

p = predict(x_train, w_out, b_out)
print(f'Output of predict: shape {p.shape}')
print('Train Accuracy: %f'%(np.mean(p == y_train) * 100))
print(f"\nupdated parameters: w:{w_out}, b:{b_out}")
print(f"model parameters (batch): w: {log_reg_w}, b:{log_reg_b}")

Iteration    0: Cost 0.5199155304053966   
Iteration  100: Cost 0.10250037689934736   
Iteration  200: Cost 0.08375939652798764   
Iteration  300: Cost 0.0753040303767322   
Iteration  400: Cost 0.07019446185843572   
Iteration  500: Cost 0.06666817616987233   
Iteration  600: Cost 0.06404283717825121   
Iteration  700: Cost 0.061990257412144874   
Iteration  800: Cost 0.06032955531009869   
Iteration  900: Cost 0.05895127743229119   
Output of predict: shape (455,)
Train Accuracy: 98.901099

updated parameters: w:[-0.6  -0.68 -0.58 -0.6  -0.23  0.17 -0.66 -0.75  0.06  0.47 -0.82  0.01
 -0.62 -0.65 -0.19  0.5   0.06 -0.14  0.41  0.47 -0.84 -1.05 -0.77 -0.78
 -0.79 -0.13 -0.79 -0.76 -0.69 -0.14], b:0.2254637406154479
model parameters (batch): w: [[-1.85  0.52 -1.86 -1.75 -1.46  9.83 -6.44 -6.09  2.91 -2.29 -9.15  2.19
  -2.9  -8.67 -3.8  -1.04  9.36 -4.68  3.9   4.85 -4.52 -9.59 -2.7  -5.87
  -1.1   0.76 -8.23 -1.82 -6.26  0.27]], b:[-3.19]
