In [None]:
# TO DO
# - Clean up the deprecated functions at the end of this notebook

In [63]:
# PENALTY FUNCTIONS - SOME EXAMPLES

# multiplier is a positive number > 0 that determines the slope

# Linear Penalty Function
def linearPenalty(x, multiplier=1): 
    return x * multiplier

# Flipped/Inverse Linear Penalty Function
def invLinearPenalty(x, multiplier=1):
    return -x * multiplier

# Linear for negative x and zero for positive x
def leftLinearPenalty(x, multiplier=1):
    if(x < 0): return -x * multiplier
    else: return 0
    
# Linear for positive x and zero for negative x
def rightLinearPenalty(x, multiplier=1):
    if(x < 0): return 0
    else: return x * multiplier

# V shape penalty
def VPenalty(x, multiplier=1):
    if (x < 0): return -x * multiplier
    else: return x
    
# Inverted V shape penalty
def invertedVPenalty(x, multiplier=1):
    if (x < 0): return x * multiplier
    else: return -x * multiplier
    
# Positive parabola penalty
def squaredPenalty(x, multiplier=1):
    return (x**2) * multiplier

# Inverted parabola penalty
def invertedSquaredPenalty(x, multiplier=1):
    return -(x**2) * multiplier

# Non-linear penalty
def nonLinearPenalty(x, multiplier=1):
    return x + x**2 + x**3

In [111]:
penaltyFunctions = {linearPenalty: "Linear Penalty", 
                    invLinearPenalty: "Inverse Linear Penalty",
                    leftLinearPenalty: "Left-Linear Penalty",
                    rightLinearPenalty: "Right-Linear Penalty",
                    VPenalty: "V Penalty",
                    invertedVPenalty: "Inverted-V Penalty",
                    squaredPenalty: "Squared Penalty",
                    invertedSquaredPenalty: "Inverted Squared Penalty",
                    nonLinearPenalty: "Non-Linear Penalty"
                   }

In [112]:
# Given a list of error values, plot the penalty function
# Error = Actual - Predicted value - This is always along a single dimension because the output is always a single
# column in a dataset.
import matplotlib.pyplot as plt
import matplotlib.cm as cm
%matplotlib inline
import seaborn as sns

# Plot the penalty function for a given list of error values and a given penalty function
def penaltyPlot(errorList, penaltyFunction):
    # Set up the x-axis
    num_points = 200
    x = np.linspace(min(errorList), max(errorList), num_points)
    fig, ax = plt.subplots(figsize=(6,4))
    ax.set(xlabel='Predicted Value - Actual Value')
    ax.set(ylabel='Penalty')
    ax.axvline(x=0, color='black')
    ax.axhline(y=0, color='black')
    ax.set(title=penaltyFunctions[penaltyFunction])
    ax.plot(x, list(map(penaltyFunction,x)))

In [85]:
#penaltyPlot([1,2,3,4,5], squaredPenalty)

In [87]:
# Load up the packages to investigate the data
import numpy as np
import pandas as pd

In [103]:
# Add a column of ones to the first column of a dataframe
# and turn it into a matrix
def df_addOnes(dataFrame):
    vals = dataFrame.values
    add_ones_column = zip(np.ones(len(dataFrame)), vals)
    feature_matrix = np.matrix([val for val in add_ones_column])
    
    return feature_matrix

In [104]:
# Making it easy to calculate the total penalty over the entire dataset
def penalty(df_features, df_output, paramater_value_list, penalty_function):
    
    # df_features is a dataframe of the features (no column of ones added)
    # df_output is a dataframe of the output column (target variable)
    # parameter_value_list is a list of w0, w1, ..., wn+1 where n is the number of features
    #  i.e., the number of columns in df_features.
    
    # Cost of being wrong calculated over the entire data set
    # Will take X and add a first column of 1s to it to enable the matrix multiplication
    # Therefore: X is an m x n matrix and theta is a n x 1 matrix
    
    #### Turn the function inputs into matrices ####
    # Get X and y into the right shapes for use in computeCost
    # Add a first column of ones to the feature matrix
    # Add a column of 1s to X 
    feature_matrix = df_addOnes(df_features)

    output_matrix = np.matrix(df_output.values)
    parameter_matrix = np.matrix(paramater_value_list).T
    
    #return feature_matrix.shape, parameter_matrix.shape, output_matrix.shape
    
    # Difference between the predicted and the actual value
    error = (feature_matrix * parameter_matrix) - output_matrix
    
    # penaltyPerOutput is an m x 1 matrix where each element is the penalty for
    # the input and its associated output for a particular value of W
    
    # Apply a penalty function to the errors from each row of the dataset
    # Use the squared error penalty function
    #penaltyPerOutput = np.power(error, 2)
    penaltyPerOutput = list(map(penalty_function,error))
    
    # totalPenalty is the sum of the penalties of each row of the dataset
    totalPenalty = np.sum(penaltyPerOutput)
    
    # The penalty of getting it wrong is 1/2m of the totalPenalty (normalized penalty)
    # m is the number of rows in df_features
    totalPenaltyNorm = totalPenalty / (2 * len(df_features))
    
    return totalPenaltyNorm

In [108]:
# Implement Gradient Descent
def gradientDescent(X, y, W, alpha, iters, penaltyFunction):
    # NOTE: X is the original dataframe -- need to add a column of ones to it and make it a matrix
    feature_matrix = df_addOnes(X)
    # feature_matrix is a m x n matrix
    # y is a m x 1 matrix
    # W is a n x 1 matrix
    
    # Keep track of everything
    sumError = np.zeros(shape=(len(W),1))
    sumErrorNorm = np.zeros(shape=(len(W),1))
    temp = np.matrix(np.zeros(W.shape))
    cost = np.zeros(iters)
    
    for i in range(iters):
        # Calculate the non-normalized values for each W parameter
        error = (feature_matrix * W) - y
        # return feature_matrix.shape, W.shape, y.shape, error, feature_matrix[:,1]
        
        for j in range(len(W)):
            # Multiply the error vector by the appropriate column of the feature_matrix and sum it
            sumError[j] = np.sum(np.multiply(error, feature_matrix[:,j]))
            
            # Normalize the sumError using alpha and m
            sumErrorNorm[j] = np.divide(np.multiply(sumError[j], alpha), len(feature_matrix))
            
            temp[j,0] = W[j,0] - sumErrorNorm[j]
        
        W = temp
    
        # Use the original dataframe in this call
        # the penalty function likes to see W.T as input
        cost[i] = penalty(X,y,W.T,penaltyFunction)
            
    # Return the value of W after iters iterations of gradient descent
    # Return the penalty = cost for this W
    return W, cost

In [None]:
##### DEPRECATED #####
# Add a column of ones to the first column of a dataframe
# and turn it into a matrix
def df_addOnes_old(dataFrame):
    vals = dataFrame.values
    add_ones_column = zip(np.ones(len(dataFrame)), vals)
    feature_matrix = np.matrix([val for val in add_ones_column])
    
    return feature_matrix

In [None]:
##### DEPRECATED ####
# Making it easy to calculate the total penalty over the entire dataset
def penalty_old(df_features, df_output, paramater_value_list):
    
    # df_features is a dataframe of the features (no column of ones added)
    # df_output is a dataframe of the output column (target variable)
    # parameter_value_list is a list of w0, w1, ..., wn+1 where n is the number of features
    #  i.e., the number of columns in df_features.
    
    # Cost of being wrong calculated over the entire data set
    # Will take X and add a first column of 1s to it to enable the matrix multiplication
    # Therefore: X is an m x n matrix and theta is a n x 1 matrix
    
    #### Turn the function inputs into matrices ####
    # Get X and y into the right shapes for use in computeCost
    # Add a first column of ones to the feature matrix
    # Add a column of 1s to X 
    feature_matrix = df_addOnes(df_features)

    output_matrix = np.matrix(df_output.values)
    parameter_matrix = np.matrix(paramater_value_list).T
    
    # Difference between the predicted and the actual value
    # We're doing matrix multiplication and subtraction
    error = (feature_matrix * parameter_matrix) - output_matrix
    
    # penaltyPerOutput is an m x 1 matrix where each element is the penalty for
    # the input and its associated output for a particular value of W
    
    # Use the squared error penalty function
    penaltyPerOutput = np.power(error, 2)
    
    # totalPenalty is the sum of the penalties of each row of the dataset
    totalPenalty = np.sum(penaltyPerOutput)
    
    # The penalty of getting it wrong is 1/2m of the totalPenalty (normalized penalty)
    # m is the number of rows in df_features
    totalPenaltyNorm = totalPenalty / (2 * len(df_features))
    
    return totalPenaltyNorm

In [None]:
#### DEPRECATED ####
# Implement Gradient Descent
def gradientDescent_old(X, y, W, alpha, iters):
    # NOTE: X is the original dataframe -- need to add a column of ones to it and make it a matrix
    feature_matrix = df_addOnes(X)
    # feature_matrix is a m x n matrix
    # y is a m x 1 matrix
    # W is a n x 1 matrix
    
    # Keep track of everything
    sumError = np.zeros(shape=(len(W),1))
    sumErrorNorm = np.zeros(shape=(len(W),1))
    temp = np.matrix(np.zeros(W.shape))
    cost = np.zeros(iters)
    
    for i in range(iters):
        # Calculate the non-normalized values for each W parameter
        error = (feature_matrix * W) - y
        # return feature_matrix.shape, W.shape, y.shape, error, feature_matrix[:,1]
        
        for j in range(len(W)):
            # Multiply the error vector by the appropriate column of the feature_matrix and sum it
            sumError[j] = np.sum(np.multiply(error, feature_matrix[:,j]))
            
            # Normalize the sumError using alpha and m
            sumErrorNorm[j] = np.divide(np.multiply(sumError[j], alpha), len(feature_matrix))
            
            temp[j,0] = W[j,0] - sumErrorNorm[j]
        
        W = temp
    
        # Use the original dataframe in this call
        # the penalty function likes to see W.T as input
        cost[i] = penalty(X,y,W.T,)
            
    # Return the value of W after iters iterations of gradient descent
    # Return the penalty = cost for this W
    return W, cost

In [None]:
#### DEPRECATED ####
import matplotlib.pyplot as plt
import numpy as np

# Generate the data for a plot with slope = 1
x, y = np.linspace(-500000,500000,500), np.linspace(-500000,500000,500)

# Set up the plot area
fig, ((ax11, ax12), 
      (ax21, ax22), 
      (ax31, ax32), 
      (ax41,ax42)) = \
plt.subplots(figsize=(15,15), nrows=4, ncols=2, sharey=False)

# Generate the plots
ax11.plot(x,y)
ax12.plot(x,-y)

def f21(x):
    if(x < 0): return -x
    else: return 0

ax21.plot(x, list(map(f21,x)))

def f22(x):
    if(x < 0): return 0
    else: return x
ax22.plot(x, list(map(f22,x)))

def f31(x):
    if (x < 0): return -x
    else: return x

ax31.plot(x, list(map(f31,x)))

def f32(x):
    if (x < 0): return x
    else: return -x

ax32.plot(x, list(map(f32,x)))

ax41.plot(x, y**2)

ax42.plot(x, -y**2)



fig.suptitle('A Variety of Penalty Functions', fontweight='bold')

# Plot (1,1)
ax11.set(xlabel='Predicted Value - Actual Value',
         ylabel='Penalty'
        )
ax11.axvline(x=0, color='black')
ax11.axhline(y=0, color='black')

# Plot (1,2)
ax12.set(xlabel='Predicted Value - Actual Value')
ax12.axvline(x=0, color='black')
ax12.axhline(y=0, color='black')

# Plot (2,1)
ax21.set(xlabel='Predicted Value - Actual Value', ylabel='Penalty')
ax21.axvline(x=0, color='black')
ax21.axhline(y=0, color='black')
ax21.set_ylim([-500000,500000])

# Plot (2,2)
ax22.set(xlabel='Predicted Value - Actual Value')
ax22.axvline(x=0, color='black')
ax22.axhline(y=0, color='black')
ax22.set_ylim([-500000,500000])

# Plot (3,1)
ax31.set(xlabel='Predicted Value - Actual Value')
ax31.set(ylabel='Penalty')
ax31.axvline(x=0, color='black')
ax31.axhline(y=0, color='black')
ax31.set_ylim([-500000,500000])

# Plot (3,2)
ax32.set(xlabel='Predicted Value - Actual Value')
ax32.axvline(x=0, color='black')
ax32.axhline(y=0, color='black')
ax32.set_ylim([-500000,500000])

# Plot (4,1)
ax41.set(xlabel='Predicted Value - Actual Value')
ax41.set(ylabel='Penalty')
ax41.axvline(x=0, color='black')
ax41.axhline(y=0, color='black')

# Plot (4,2)
ax42.set(xlabel='Predicted Value - Actual Value')
ax42.axvline(x=0, color='black')
ax42.axhline(y=0, color='black')