In [0]:
"""
DESCRIPTION OF IMPLEMENTATION:

Since ridge regression was designed to handle singular feature matrices we don't have to worry about singular matrices and we can used the closed form solution 
for the minimal weight vector.  I build weight vectors over the range of lambda's 0-100 and the combination of these weight vectors form the model.  Once we 
calculate the model we calculate the prediction on the next set  over the range of lambda's and finally calculate the RMSE the same way so we get RMSE values over 
the entire range of lambda values.  This is done for 5-folds and the averages are compared as well as the individual RMSE values over the range of lambda's.

I include the equations used in the LateX doc towards the end
"""

'\nThis is just a rough skeleton to assist you in understanding the process flow. \nModify it to meet the requirements of the questions. \n\nPut the description paragraph here\nDESCRIPTION OF IMPLEMENTATION:\n\nImplementation of Ridge Regression without using gradient descent \nas a optimality condition\n\n\n'

In [0]:
# Used to upload the IRISfeat.csv and IRISlabel.csv files
from google.colab import files
uploaded = files.upload()

In [0]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [0]:
# Grab data
data = pd.read_csv('Housing.csv')

In [0]:
def get_next_train_valid(X_shuffled, y_shuffled, itr):
    """
    Return one validation set and concatenate the rest to 
    use as a training set
    """
    val_x, val_y = X_shuffled[itr], y_shuffled[itr][:,None]
    
    training_x = np.empty((0,X_shuffled[0].shape[1]))
    training_y = np.empty((0,1))
    
    for k,v in X_shuffled.items():
      if k != itr:
        training_x = np.concatenate((training_x,v),axis=0)
    
    for k,v in y_shuffled.items():
      if k != itr:
        training_y = np.concatenate((training_y,v.reshape(v.shape[0],1)),axis=0)
        
        
    return training_x, training_y, val_x, val_y

In [0]:
def train(X_train, y_train,lam):
    """
    Not using gradient descent so we take gradient of loss function and 
    find weights that set it equal to 0. w = (lamdaI + (X^T)X)^-1 X^Ty which
    is the closed form solution
    """
    weights = []
 
    xTx = np.dot( np.transpose(X_train), X_train )
    lam_I = np.identity(xTx.shape[0]) * lam

    inv_term = np.linalg.inv(lam_I + xTx)
    out_term = np.dot( np.transpose(X_train), y_train )

    w = np.dot(inv_term, out_term)
    weights.append(w)

    intercept = w[0]
    
    return weights, intercept

In [0]:
def predict(X_valid, model, intercept):
    """
    Prediction function that multiplies validation
    set by the set of weight vectors
    """
    
    y_predictions = []
    for w in model:
      y_pred = np.dot(X_valid, w)
      y_predictions.append(y_pred)
    
    return y_predictions

In [0]:
# Convert to numpy matrix [X:y] were X is features matrix and y is label vector
data_np = data.values

# Shuffle entries
np.random.shuffle(data_np)

In [0]:
"""
Overall logic.
"""

# Partition data into k folds
k = 5

X_shuffled = dict([(i,feat) for i,feat in enumerate(np.array_split(data_np[:,:-1],k))])
y_shuffled = dict([(i,feat) for i,feat in enumerate(np.array_split(data_np[:,-1],k))])

# Loop through the folds
RMSEs_Avgs = []
RMSEs_lambdas = []
for i in range(0,k):
  
  # Get training and validation sets
  x_train, y_train, x_test, y_test = get_next_train_valid(X_shuffled, y_shuffled, i)
  
  # Train to get the model for prediction
  lam = 10
  model, intercept = train(x_train, y_train, lam)
  
  # Predict y values
  y_predictions = predict(x_test, model,intercept)
  
  # Computer RMSE over the lambdas
  RMSEs = []
  for p in y_predictions:
    # Calculate rmse over the predictions
    rmse = np.sqrt(np.sum(np.square(y_test - p)) / len(y_test))
    RMSEs.append(rmse)
   
  # Store the average rmse for this k-fold
  avg = sum(RMSEs) / len(RMSEs)
  RMSEs_Avgs.append(avg)
  
  # Store the rmse values across all lambdas for this k-fold
  RMSEs_lambdas.append(RMSEs)


In [62]:
'''
The code below is used to plot and calculate some statistics for the previous algorithm
'''

'\nThe code below is used to plot and calculate some statistics for the previous algorithm\n'

In [63]:
RMSEs_Avgs

[5.8347519596086155,
 4.853322966452454,
 4.709014443334091,
 5.331529879001799,
 4.423681428645843]

In [64]:
sum(RMSEs_Avgs) / len(RMSEs_Avgs)

5.0304601354085605