## Setup

In [2]:
import numpy as np
import pandas as pd

weight = [0.5,2.3,2.9]
height = [1.4,1.9,3.2]

power_for_residual_loss_func = 2

def sq_residuals(x,y,intercept,slope):
    output = []
    for i,j in zip(x,y):
        output.append(j-(intercept + slope * i))
    return [i**2 for i in output]

def derivatives_sq_residuals_with_respect_intercept(x,y,intercept,slope,power_for_residual_loss_func):
    output = []
    for i,j in zip(x,y):
        output.append( power_for_residual_loss_func * (j-(intercept+slope*i)) * -1 )
    return output

def derivatives_sq_residuals_with_respect_slope(x,y,intercept,slope,power_for_residual_loss_func):
    output = []
    for i,j in zip(x,y):
        output.append( power_for_residual_loss_func * (j-(intercept+slope*i)) * (i*-1) )
    return output

outputs = []
for lrate in np.arange(0.01,0.075,0.005):
    intercept = 0
    slope = 1
    for i in range(999):
        deriv_sum_sqres_intercept = sum(derivatives_sq_residuals_with_respect_intercept(weight,height,intercept,slope,power_for_residual_loss_func))
        deriv_sum_sqres_slope = sum(derivatives_sq_residuals_with_respect_slope(weight,height,intercept,slope,power_for_residual_loss_func))
        #print(f'Derivative of sum of sq residuals with respect to the intercept: {deriv_sum_sqres_intercept}')
        #print(f'Derivative of sum of sq residuals with respect to the slope: {deriv_sum_sqres_slope}')
        # Apply Learning Rate to calculate new Step Size
        step_size_intercept = deriv_sum_sqres_intercept*lrate
        step_size_slope = deriv_sum_sqres_slope*lrate
        #print(f'Intercept step size: {round(step_size_intercept,4)}')
        #print(f'Slope step size: {round(step_size_slope,4)}')
        # Calculate new Intercept and Slope from Step Size and previous values
        intercept -= step_size_intercept
        slope -= step_size_slope
        #print(f'New Intercept: {round(intercept,4)}')
        #print(f'New Slope: {round(slope,4)}')
        sum_sq_resids = sum(sq_residuals(weight,height,intercept,slope))

        # Record
        outputs.append([lrate,i,slope,intercept,sum_sq_resids])
        # End early if all step sizes become very small
        if abs(step_size_intercept) < 0.001 and abs(step_size_slope) < 0.001:
            break



## All Results

In [8]:
pd.options.display.max_rows = 9999
results = pd.DataFrame(outputs,columns=["lrate","i","slope","intercept","loss"])
results.loc[results.groupby(["lrate"])["i"].idxmax()] 

Unnamed: 0,lrate,i,slope,intercept,loss
206,0.01,206,0.6778085,0.8623845,0.4496481
368,0.015,161,0.6651472,0.8921019,0.4467797
502,0.02,133,0.6589453,0.9066586,0.4458098
617,0.025,114,0.6551898,0.9154732,0.4453616
718,0.03,100,0.652738,0.9212277,0.4451257
808,0.035,89,0.6511028,0.9250659,0.4449931
890,0.04,81,0.6495871,0.9286232,0.444888
965,0.045,74,0.6485969,0.9309475,0.4448286
1034,0.05,68,0.6478754,0.9326408,0.4447899
1098,0.055,63,0.6472554,0.9340959,0.4447597


## Using Learning Rate 0.01

In [5]:
results[results.lrate==0.010].iloc[-1]

lrate          0.010000
i            206.000000
slope          0.677809
intercept      0.862385
loss           0.449648
Name: 206, dtype: float64

## Best Learning Rate based on Loss

In [6]:
results.iloc[results['loss'].idxmin()]

lrate          0.060000
i            123.000000
slope          0.640597
intercept      0.948371
loss           0.444620
Name: 1222, dtype: float64