In [12]:
#6 basic method implementations as described above in step 2: We want you to implement and use the methods 
#we have seen in class and in the labs. You will need to provide working implementations of the functions in Table 1. 
#If you have not finished them during the labs, you should start by implementing the first ones to have a working 
#toolbox before diving in the dataset.

#Return type: Note that all functions should return: (w, loss), which is the last weight vector of the method, 
#and the corresponding loss value (cost function). Note that while in previous labs you might have kept track of 
#all encountered w for iterative methods, here we only want the last one.

In [13]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2

In [14]:
def compute_MSE(y, tx, w):
    MSE=((np.linalg.norm(y-(np.dot(tx,w))))**2)/(2*len(y))
    
    return MSE

In [15]:
def least_squares(y, tx, n_max):
    """calculate the least squares solution."""
    xx=np.delete(tx,0,1) #save x vector
    for i in range(n_max-2):
        np.concatenate(tx,np.power(xx,i+2)) #add powers of x to tx
    w_opt=np.linalg.solve(np.matmul(np.transpose(tx),tx),np.dot(np.transpose(tx),y)) #compute weights
    mse=compute_MSE(y,tx,w_opt) #compute error
    
    return mse, w_opt

In [16]:
def ridge_regression(y, tx, n_max, lambda_):
    """implement ridge regression."""
    lambda_=lambda_/(2*np.shape(y))
    xx=np.delete(tx,0,1) #save x vector
    for i in range(n_max-2):
        np.append(tx,np.power(xx,i+2)) #add powers of x to tx
    w_opt=np.linalg.solve(np.matmul(np.transpose(tx),tx)+lambda_*np.identity(np.shape(y)),np.dot(np.transpose(tx),y)) #compute weights
    mse=compute_MSE(y,tx,w_opt) #compute error
    
    return mse,w_opt

In [17]:
def sigmoid(t): #So we want to return a value between 0 and 1 to make sure we are actually representing a probability. To do this we will make use of the logistic function.
    return 1.0 / (1 + np.exp(-t))

In [18]:
def calculate_loss(y, tx, w):  # the goal is to compute the cost with the technique of log likelohood (negative)
    fontion = sigmoid(np.dot(tx,w))
    y_T=y.T
    loss = np.dot(y_T, np.log(fonction)) + np.dot((1 - y).T, np.log(1 - fonc))
    return np.squeeze(- loss)  #squeeze Remove single-dimensional entries from the shape of an array

In [19]:
def calculate_gradient(y, tx, w): # the goal is to calculate the gradient of losses
    fonction = sigmoid(dot(tx,w))
    gradient = dot(tx.T, fonction - y)
    return gradient

In [20]:
def learning_by_gradient_descent(y, tx, w, gamma): #the goal is to compute of step of the gradient descent method using logistic regression
    gradient = calculate_gradient(y, tx, w)
    w = w-gamma*gradient
    loss = calculate_loss(y, tx, w)
    return w, loss

In [None]:
def logistic_regression(y, tx, w): #the goal is to calculate the loss and the gradient thanks to fonction above
    loss = calculate_loss(y, tx, w)
    gradient = calculate_gradient(y, tx, w)
    return loss, gradient

In [8]:
def regularized_logistic_regression(y, tx, w, lambda_): 
    samples = y.shape[0]
    loss = calculate_loss(y, tx, w) + lambda_ * np.squeeze(dot(w.T,w))
    gradient = calculate_gradient(y, tx, w) + 2 * lambda_ * w
    return loss, gradient

In [9]:
def learning_by_regularized_gradient(y, tx, w, gamma, lambda_):
    loss, gradient = penalized_logistic_regression(y, tx, w, lambda_)
    w -= gamma * gradient
    return loss, w

In [21]:
def build_k_indices(y, k_fold, seed):
    """build k indices for k-fold."""
    num_row = y.shape[0]
    interval = int(num_row / k_fold)
    np.random.seed(seed)
    indices = np.random.permutation(num_row)
    k_indices = [indices[k * interval: (k + 1) * interval]
                 for k in range(k_fold)]
    return np.array(k_indices)

In [None]:
def build_poly(x, degree):
    """polynomial basis functions for input data x, for j=0 up to j=degree."""
    # ***************************************************
    # polynomial basis function:
    phi_mat = np.empty([len(x),degree+1])
    for i in range(len(x)):
        for j in range(degree+1):
            phi_mat[i,j] = x[i]**j
                
    return phi_mat
    # this function should return the matrix formed
    # by applying the polynomial basis to the input data
    # ***************************************************

In [None]:
def cross_validation(y, x, k_indices, k, lambda_, degree):
    """return the loss of ridge regression."""
    # get k'th subgroup in test, others in train: TODO
    # ***************************************************
    test_indice = k_indices[k]
    train_indice = k_indices[~(np.arange(k_indices.shape[0]) == k)]
    train_indice = train_indice.reshape(-1)
    y_test = y[test_indice]
    y_train = y[train_indice]
    x_test = x[test_indice]
    x_train = x[train_indice]
    
    # form data with polynomial degree: TODO
    # ***************************************************
    tx_train = build_poly(x_train, degree)
    tx_test = build_poly(x_test, degree)
    
    # ridge regression: TODO
    # ***************************************************
    w = ridge_regression(y_train, tx_train, lambda_)
    
    # calculate the loss for train and test data: TODO
    # ***************************************************
    loss_train = np.sqrt(2 * compute_mse(y_train, tx_train, w))
    loss_test = np.sqrt(2 * compute_mse(y_test, tx_test, w))
   
    return loss_train, loss_test