In [1]:
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import KFold
import autograd.numpy as np
from autograd import grad
                                                                           
np.random.seed(100)


In [2]:
def cross_entropy_loss(X, y, beta ):
  if X.shape[0] != len(y):
    raise TypeError("The number of datapoints must match between X and y")
  else:
    n = len(y)
    y = y.reshape(-1,1)
    Lambda = 0.001  # l2 regularization weight
    p = np.exp(X @ beta)/ (1+ np.exp(X @ beta))
    cost = np.sum((y* np.log(p) + (1-y)*np.log(1-p))) + Lambda*(np.sum(beta**2))
  return cost


def design_matrix(x_train, x_test):
  # Introduce bias and polynomials, if necessary
  poly = PolynomialFeatures(degree = 1)
  X_train= poly.fit_transform(x_train)
  X_test= poly.fit_transform(x_test)

  # Z-score the design matrix except the bias
  mean = np.mean(X_train[:,1:] , axis=0)
  std = np.std(X_train[:,1:] , axis=0) 

  X_train[:,1:] = X_train[:,1:] - mean / std
  X_test[:,1:] = X_test[:,1:] - mean / std

  return X_train , X_test


def simple_SGD(X_train_scaled, y_train, beta ,lr_eta = 0.1):
  if X_train_scaled.shape[0] != len(y_train):
    raise TypeError("The number of datapoints must match between X and y")
  else:
    n = len(y_train)  #number of datapoints
    M = 5  #size of each minibatch
    m = int(n/M) #number of minibatches
    n_epochs = 100 #number of epochs
    batch_indices= {}

    kf= KFold(n_splits = m)
    for k,(_, test_index) in enumerate(kf.split(X_train_scaled)):
      batch_indices[k] = test_index
          
    for epoch in range(1,n_epochs+1):
      for i in range(m):
        chosen_batch = np.random.randint(m)

        #Pick minibatch corresponding to 'chosen_batch'
        X_train_minibatch = X_train_scaled[batch_indices[chosen_batch ]]
        y_train_minibatch = y_train[batch_indices[chosen_batch ]]     

        #Compute the gradient using the data in minibatch Bk
        # Compute gradients using grad
        #p = np.exp(X_train_minibatch @ beta) / (1 + np.exp(X_train_minibatch @ beta))
        #gradients = - (X_train_minibatch.T @ (y_train_minibatch.reshape(-1,1) - p.reshape(-1,1)) + (2*0.001*beta) )

        gradients = grad(cross_entropy_loss,2)
        
        #Compute new suggestion for beta
        beta -= lr_eta*gradients(X_train_minibatch, y_train_minibatch, beta)

  return beta







## Part g) Logistic Regression code (own)

In [3]:
# Load the data
cancer = load_breast_cancer()
X, y =cancer.data,cancer.target

## Cross-validation based Logistic regression
ksplits = 5
kfold = KFold(n_splits = ksplits )
lr_eta = [1e-1]

est_test_acc = np.zeros((len(lr_eta), ksplits ))

for n_lr,lr in  enumerate(lr_eta):

  ## split the data into 5 folds and evaluate performance 
  for fold, (train_ind, test_ind) in enumerate(kfold.split(X)):
    print('Fold', fold+1)
    X_train , X_test = X[train_ind], X[test_ind]
    y_train, y_test = y[train_ind], y[test_ind]

    ## Create polynomials, if necessary and z-scale data
    X_train_scaled, X_test_scaled  = design_matrix(X_train, X_test)
    
    # Randomly initialize the beta values
    beta = np.random.randn(X_train_scaled.shape[1],1)

    # optimize betas using simple_sgd
    beta_opt =  simple_SGD(X_train_scaled, y_train, beta, lr_eta = lr)

    print('Cross Entropy loss:',cross_entropy_loss(X_train_scaled, y_train, beta_opt))
    
    # find accuracy on test set with beta_opt
    y_pred = X_test_scaled @ beta_opt
    
    est_test_acc[n_lr , fold] = np.mean(y_pred== y_test)



print(est_test_acc)



Fold 1


  return f_raw(*args, **kwargs)
  return f_raw(*args, **kwargs)
  return f_raw(*args, **kwargs)
  return f_raw(*args, **kwargs)
  defvjp(anp.log,    lambda ans, x : lambda g: g / x)
  lambda ans, x, y : unbroadcast_f(y, lambda g: - g * x / y**2))


Cross Entropy loss: nan
Fold 2


  return f_raw(*args, **kwargs)
  return f_raw(*args, **kwargs)
  defvjp(anp.log,    lambda ans, x : lambda g: g / x)


Cross Entropy loss: nan
Fold 3


  return f_raw(*args, **kwargs)
  defvjp(anp.log,    lambda ans, x : lambda g: g / x)
  lambda ans, x, y : unbroadcast_f(y, lambda g: - g * x / y**2))
  defvjp(anp.exp,    lambda ans, x : lambda g: ans * g)


Cross Entropy loss: nan
Fold 4


  return f_raw(*args, **kwargs)
  return f_raw(*args, **kwargs)
  defvjp(anp.log,    lambda ans, x : lambda g: g / x)
  defvjp(anp.log,    lambda ans, x : lambda g: g / x)
  lambda ans, x, y : unbroadcast_f(y, lambda g: - g * x / y**2))
  lambda ans, x, y : unbroadcast_f(y, lambda g: - g * x / y**2))


Cross Entropy loss: nan
Fold 5


  return f_raw(*args, **kwargs)
  return f_raw(*args, **kwargs)
  defvjp(anp.log,    lambda ans, x : lambda g: g / x)


Cross Entropy loss: nan
[[0. 0. 0. 0. 0.]]


## Part g) Scikit Learn's Logistic Regression

In [4]:
# Logistic Regression
logreg = LogisticRegression(solver='lbfgs', max_iter= 1000)
logreg.fit(X_train_scaled, y_train)
print("Test set accuracy with Logistic Regression: {:.2f}".format(logreg.score(X_test_scaled,y_test)))


Test set accuracy with Logistic Regression: 0.95
