# ID Block

- Author: Eric Zair
- File: my_gradient_decent_implementation.py.ipynb
- Date 02/11/2020

## Program Description

From scratch I will create my own linear regression model and implementing my own gradient decent. This model will map the price of a given car to the expected value by only using mileage as a value. In otherwords, I will find the relation of milage to the price of a used car.

In [154]:
import turicreate
from math import sqrt
from matplotlib import pyplot as plt

## First We Need To Add Our Data File

In [155]:
used_car_sales = turicreate.SFrame('../data/usedCar.csv')

------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,int,int,str,int,str,str,int,str,str,str,str,str]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


## Split Testing & Training Data

### 80% Training, 20% Testing

In [156]:
training_data, test_data, = used_car_sales.random_split(.8, seed=0)

In [157]:
# Our trainging data (if you want to see it!).
print(training_data.show())

None


## Now We Need To Parse Out The Important Data

### This important data: TotalCost & Mileage

In [158]:
# Since we are using the training data to teach our model, we want to parse needed fields form that.

# This is our Y value.
training_total_cost_data = training_data['total.cost']
training_mileage_data = training_data['mileage']

In [159]:
# If you wanna see what the data contains.
print("training_total_cost_data:", training_total_cost_data)
print("training_mileage_data:", training_mileage_data)



training_total_cost_data: [4037, 4662, 4459, 4279, 4472, 5256, 5522, 3194, 4332, 4249, 4834, 6886, 4520, 5583, 5083, 4402, 6794, 4964, 3924, 5203, 4241, 5510, 5921, 4026, 4031, 4569, 5590, 4824, 4095, 4302, 3196, 4977, 4391, 4570, 4970, 4431, 5558, 4218, 4601, 4321, 4636, 4817, 3539, 7063, 4402, 4349, 3558, 5060, 4710, 5620, 3882, 5580, 5080, 3991, 5193, 5577, 4370, 3891, 6101, 3157, 6430, 7578, 3553, 4326, 5531, 3501, 6395, 4014, 5728, 5992, 3328, 5551, 6169, 5344, 5538, 4702, 4755, 4362, 4487, 4959, 4872, 5312, 4446, 3907, 3406, 4967, 3644, 4651, 3763, 4892, 5653, 6593, 4332, 4932, 5060, 5921, 4497, 4787, 4797, 3332, ... ]
training_mileage_data: [67341, 69384, 58239, 58999, 47234, 64674, 92879, 67197, 98403, 87183, 58701, 72823, 79729, 91034, 51411, 71264, 98214, 69802, 96330, 85846, 69417, 63035, 65893, 71165, 102645, 77427, 103323, 76213, 89709, 92049, 88600, 81929, 65530, 75830, 93738, 93768, 106102, 74021, 100837, 109049, 58519, 107040, 76569, 72809, 80060, 72431, 84965, 56757, 7

In [160]:
# If you wanna see what data we will be using in our training data for total.cost.
print("training_total_cost_data:", training_total_cost_data, end='\n\n')
print("training_mileage_data:", training_mileage_data)



training_total_cost_data: [4037, 4662, 4459, 4279, 4472, 5256, 5522, 3194, 4332, 4249, 4834, 6886, 4520, 5583, 5083, 4402, 6794, 4964, 3924, 5203, 4241, 5510, 5921, 4026, 4031, 4569, 5590, 4824, 4095, 4302, 3196, 4977, 4391, 4570, 4970, 4431, 5558, 4218, 4601, 4321, 4636, 4817, 3539, 7063, 4402, 4349, 3558, 5060, 4710, 5620, 3882, 5580, 5080, 3991, 5193, 5577, 4370, 3891, 6101, 3157, 6430, 7578, 3553, 4326, 5531, 3501, 6395, 4014, 5728, 5992, 3328, 5551, 6169, 5344, 5538, 4702, 4755, 4362, 4487, 4959, 4872, 5312, 4446, 3907, 3406, 4967, 3644, 4651, 3763, 4892, 5653, 6593, 4332, 4932, 5060, 5921, 4497, 4787, 4797, 3332, ... ]

training_mileage_data: [67341, 69384, 58239, 58999, 47234, 64674, 92879, 67197, 98403, 87183, 58701, 72823, 79729, 91034, 51411, 71264, 98214, 69802, 96330, 85846, 69417, 63035, 65893, 71165, 102645, 77427, 103323, 76213, 89709, 92049, 88600, 81929, 65530, 75830, 93738, 93768, 106102, 74021, 100837, 109049, 58519, 107040, 76569, 72809, 80060, 72431, 84965, 56757, 

## Implementation Of Gradient Decent

In [161]:


def gradient_descent_step(x_value_sframe, y_value_sframe, intercept=0, slope=0, step_size=0.0001):
    """ MORE LATER"""
    n = float(len(x_value_sframe))
    magnitude = 1
    tolarance = 0.01
    while magnitude >= tolarance:
        # y_hat is our expected value.
        # y_hat = (slope * x_value_sframe) + intercept
        error = (y_value_sframe - (intercept + slope * x_value_sframe)) ** 2
        # Okay, our error tells us how off we are with our current value Bigger error == BAD :)
        print("y_value_sframe:", y_value_sframe, end='\n\n')        
        # for i in range(len(x_value_sframe)):
        #     error += (y_value_sframe[i] - (slope * x_value_sframe[i] + intercept)) ** 2
        # sum_of_error = error / n
        
        sum_of_error = error.sum()
        sum_of_slope_error = x_value_sframe.sum() * sum_of_error

        print("\nSUM_OF_ERROR:", sum_of_error, "\n\n")
        print("\nError:", error, end='\n\n')
        

        # intercept_gradient = 0
        # slope_gradient = 0
        # for x_i in range(len(x_value_sframe)):
        #     x_i = x_value_sframe[i]
        #     y_i = y_value_sframe[i]  
            
            # Now that we have our sum of error, we must adjust our error.
            
        intercept_adjustment = - 2 * slope * step_size * sum_of_error
        intercept += intercept_adjustment

        slope_adjustment = -2 * step_size * sum_of_error_slope
        slope += slope_adjustment

        mag = sqrt(sum_of_error ** 2 + sum_of_error ** 2)

            # intercept_gradient += -(2/n) * (y_i - (slope * x_i + intercept))
            # slope_gradient += -(2/n) * x_i * (y_i - (slope * x_i + intercept))

        print("sum_of_slope_error:", slope, end='\n\n')
        slope_adjustment = 2 * step_size * sum_of_slope_error
        slope += slope_adjustment

        print("sum_of_error^2:", sum_of_error ** 2, end='\n\n')

        # print("sum_of_slope_error^2:", sum_of_slope_error ** 2, end='\n\n')
        # new_intercept_gradient = intercept - (step_size * intercept_gradient)
        # new_slope_gradient = slope - (step_size * slope_gradient)
    return new_slope_gradient, new_intercept_gradient


def gradient_descent(x_value_sframe, y_value_sframe, intercept=0, slope=0, step_size=0.001):
        new_intercept = intercept
        new_slope = slope
        for i in range(len(x_value_sframe)):
            new_intercept, new_slope = gradient_descent_step(x_value_sframe, y_value_sframe)
        return new_intercept, new_slope


In [162]:
coefficients = gradient_descent(training_mileage_data, training_total_cost_data)
print("Intercept:", coefficients[0], "\nSlope:", coefficients[1])

y_value_sframe: [4037, 4662, 4459, 4279, 4472, 5256, 5522, 3194, 4332, 4249, 4834, 6886, 4520, 5583, 5083, 4402, 6794, 4964, 3924, 5203, 4241, 5510, 5921, 4026, 4031, 4569, 5590, 4824, 4095, 4302, 3196, 4977, 4391, 4570, 4970, 4431, 5558, 4218, 4601, 4321, 4636, 4817, 3539, 7063, 4402, 4349, 3558, 5060, 4710, 5620, 3882, 5580, 5080, 3991, 5193, 5577, 4370, 3891, 6101, 3157, 6430, 7578, 3553, 4326, 5531, 3501, 6395, 4014, 5728, 5992, 3328, 5551, 6169, 5344, 5538, 4702, 4755, 4362, 4487, 4959, 4872, 5312, 4446, 3907, 3406, 4967, 3644, 4651, 3763, 4892, 5653, 6593, 4332, 4932, 5060, 5921, 4497, 4787, 4797, 3332, ... ]


SUM_OF_ERROR: 349519849840.0 



Error: [16297369.0, 21734244.0, 19882681.0, 18309841.0, 19998784.0, 27625536.0, 30492484.0, 10201636.0, 18766224.0, 18054001.0, 23367556.0, 47416996.0, 20430400.0, 31169889.0, 25836889.0, 19377604.0, 46158436.0, 24641296.0, 15397776.0, 27071209.0, 17986081.0, 30360100.0, 35058241.0, 16208676.0, 16248961.0, 20875761.0, 31248100.0, 23270976.0

NameError: name 'sum_of_error_slope' is not defined