# Gradient Descent

In the following code, we generate a set of data points along a line

In [None]:
import numpy as np
import pandas as pd
import time
import matplotlib.pyplot as plt

In [None]:
# here's a function that generates linear data
def genLinearData(x, slope, intercept):
    return slope * x + intercept

# and here's a function that adds Gaussian noise to a set of values
def addGaussianNoise(data, mu, sigma):
    ''' 
        data is the data to add noise to
        mu is the median of the Gaussian
        sigma is the standard deviation of the Gaussian
    '''
    return data + np.random.normal(mu, sigma, len(data))

In [None]:
# generate a data set for linear regression
numPoints = 500 # number of points

# evenly spaced x values
x = np.linspace(-2,2,numPoints)

# "true" y values, based on the slope, intercept, and x values
y = genLinearData(x, slope=5, intercept=-1)



Plot the data

In [None]:
plt.plot(x,y)

Next, we add some Gaussian noise to the data

In [None]:
# add noise
# mu = 0
# sigma = 5
yNoisy = addGaussianNoise(y, 0, 5)

Plot the noisy data

In [None]:
plt.plot(x,yNoisy)

## Task One: Gradient Descent

In [None]:
# gradient descent
def gd(X, Y, startM, startB, numIters, epsilon, lRate):
    """
    gradient descent algorithm for linear regression using mean squared error. 
    Uses all of the data in each iteration
    
     Returns
        The number of iterations run
        The final value of lRate
        The final prediction for the slope
        The final prediction for the intercept

    Parameters:
        X - input data
        Y - true output data
        startM - starting value for the line slope
        startB - starting value for the line intercept
        numIters - maximum number of iterations to run
        epsilon - Stopping condition: change in loss function from iteration to iteration
        lRate - learning rate
    """
    # your code here

### Run implementation of Gradient Descent

Generate data using the following parameters:

* x range: -2 to 2
* 500 points
* slope = 5
* intercept = -1

Gaussian noise:
* mu = 0
* sigma = 5

Run implementation of gradient descent on data generated with these aforementioned parameters.

Start with values 
* slope = -12
* intercept = 0
* max iterations = 1000
* epsilon = .01
* learning rate = 0.1

In [None]:
# set the seed before generating the data and running your algorithm
np.random.seed(553)

numPoints = 500
# evenly spaced x values
x = np.linspace(-2,2,numPoints)
y = genLinearData(x, slope=5, intercept=-1)
yNoisy = yNoisy = addGaussianNoise(y, mu=0, sigma=5)


In [None]:
start = time.time()
iters, myM, myB = gd(x, yNoisy, startM=-12, startB=0, numIters=1000, epsilon=.01, lRate=0.1)
end = time.time()

print('time: ', round(end - start,4))
print('m: ', round(myM,2), 
      'b: ', round(myB,2),
      'iters: ', iters)

## Task Two Gradient Descent with Bold Driver

Return:

* the number of iterations actually run

* the final learning rate

* the predicted value for the slope

* the predicted value for the intercept


In [None]:
def gdBoldDriver(X, Y, startM, startB, numIters, epsilon, lRate):
    """
    gradient descent algorithm for linear regression using mean squared error. 
    Uses all of the data in each iteration.
    Implements the Bold Driver algorithm
    Returns
        The number of iterations run
        The final learning rate
        The final prediction for the slope
        The final prediction for the intercept
    
    Parameters:
        X - input data
        Y - true output data
        startM - starting value for the line slope
        startB - starting value for the line intercept
        numIters - maximum number of iterations to run
        epsilon - Stopping condition: change in loss function from iteration to iteration
        lRate - learning rate
    """

    # your code here

### Run implementation of Gradient Descent with Bold Driver
using the same parameters as last time

In [None]:
start = time.time()
iters, lRate, myM, myB = gdBoldDriver(x, yNoisy, startM=-12, startB=0, numIters=1000, epsilon=.01, lRate=0.1)
end = time.time()

print('time: ', round(end - start,4))
print('m: ', round(myM,2), 
      'b: ', round(myB,2),
      'iters: ', iters, 
      'lambda: ', round(lRate,4))

## Task Three: Mini-batch Gradient Descent

Implement mini-batch Gradient Descent for linear regression, using a parameterized batch size at each iteration. Also, add a flag that indicates if Bold Driver should be used.

Return:

* the number of iterations actually run

* the final learning rate

* the predicted value for the slope

* the predicted value for the intercept



In [None]:
def gdMiniBatch(X, Y, startM, startB, numIters, epsilon, lRate, batchSize, withBD):
    """
    Mini batch gradient descent algorithm for linear regression using mean squared error. 
    Uses a random selection of batchSize data points in each iteration.
    Returns
        The number of iterations run
        The final learning rate
        The final prediction for the slope
        The final prediction for the intercept
    
    Parameters:
        X - input data
        Y - true output data
        startM - starting value for the line slope
        startB - starting value for the line intercept
        numIters - maximum number of iterations to run
        epsilon - Stopping condition: change in loss function from iteration to iteration
        lRate - starting learning rate
        batchSize - the number of points to use at each iteration
        withBD - flag set to 1 if Bold Driver should be used
    """
    # your code here

    
    # dataset size
    n = len(X)

    # randomly select a batch
    # this code gives you a set of indexes you can use on X and Y 
    thisBatch = np.random.choice(n, batchSize, replace=False)

    # your code here
    


### Run implementation of Mini-batch Gradient Descent

using the same parameters as last time.

In [None]:
# since this approach is stochastic, set the seed again, so we all get the same answers
np.random.seed(553)

# keep track of values across iterations
totalTime = 0
totalIters = 0
totalLRate = 0
totalM = 0
totalB = 0
totalMisses = 0

numTimes = 100
for iter in range(numTimes):
    start = time.time()
    iters, lRate, myM, myB = gdMiniBatch(x, yNoisy, startM=-12, startB=0, numIters=1000, \
                                         epsilon=.01, lRate=0.1, batchSize = 10, withBD=0)
    end = time.time()

    if # your code here. How do you know if there was a miss?
        totalMisses += 1
    else:
        totalTime += end - start
        totalIters += iters
        totalLRate += lRate
        totalM += myM
        totalB += myB


print('time: ', round(totalTime/(numTimes-totalMisses),4))
print('m: ', round(totalM/(numTimes-totalMisses),2), 
      'b: ', round(totalB/(numTimes-totalMisses),2),
      'iters: ', round(totalIters/(numTimes-totalMisses),0), 
      'lambda: ', round(totalLRate/(numTimes-totalMisses),4),
      'total misses: ', totalMisses)

### Run Mini-Batch Gradient Descent **with Bold Driver**, using a batch size of 10.

In [None]:
# since this approach is stochastic, set the seed again, so we all get the same answers
np.random.seed(553)

print('time: ', round(totalTime/(numTimes-totalMisses),4))
print('m: ', round(totalM/(numTimes-totalMisses),2), 
      'b: ', round(totalB/(numTimes-totalMisses),2),
      'iters: ', round(totalIters/(numTimes-totalMisses),0), 
      'lambda: ', round(totalLRate/(numTimes-totalMisses),4),
      'total misses: ', totalMisses)

## Task Four Stochastic Gradient Descent

Stochastic Gradient Descent is just mini-batch GD, run with a batch size of 1.


In [None]:
# since this approach is stochastic, set the seed again, so we all get the same answers
np.random.seed(553)

# your code here

print('time: ', round(totalTime/(numTimes-totalMisses),4))
print('m: ', round(totalM/(numTimes-totalMisses),2), 
      'b: ', round(totalB/(numTimes-totalMisses),2),
      'iters: ', round(totalIters/(numTimes-totalMisses),0), 
      'lambda: ', round(totalLRate/(numTimes-totalMisses),4),
      'total misses: ', totalMisses)

### Run implementation of Stochastic Gradient Descent with Bold Driver

Again, run the algorithm 100 times and report back averages.


In [None]:
# since this approach is stochastic, set the seed again, so we all get the same answers
np.random.seed(553)

print('time: ', round(totalTime/(numTimes-totalMisses),4))
print('m: ', round(totalM/(numTimes-totalMisses),2), 
      'b: ', round(totalB/(numTimes-totalMisses),2),
      'iters: ', round(totalIters/(numTimes-totalMisses),0), 
      'lambda: ', round(totalLRate/(numTimes-totalMisses),6),
      'total misses: ', totalMisses)

## Task Five: Unknown data
### Read in the data

In [None]:
import pandas as pd
from google.colab import files

# you may get an error if you do not have 3rd party cookies enabled
# you may use the files pane on the left to upload the file instead
files.upload()

df = pd.read_csv('GDdata.csv',header=None, names=['newX', 'newY'])
df

In [None]:
plt.scatter(df['newX'],df['newY'])

Start the algorithm with:

* slope = -1
* intercept = -4
* learning rate 0.01
* max iterations 10000

### Run Gradient Descent on the data

In [None]:
start = time.time()
iters, myM, myB = gd(df['newX'], df['newY'], startM=-1, startB=-4, numIters=10000, epsilon=.01, lRate=.01)
end = time.time()

print('time: ', round(end - start,4))
print('m: ', round(myM,2), 
      'b: ', round(myB,2),
      'iters: ', iters)

### Run Gradient Descent with Bold Driver on the data

In [None]:
start = time.time()
iters, lRate, myM, myB = gdBoldDriver(df['newX'], df['newY'], startM=-1, startB=-4, numIters=500, epsilon=.01, lRate=0.01)
end = time.time()
print('time: ', round(end - start,4))
print('m: ', round(myM,2), 
      'b: ', round(myB,2),
      'iters: ', iters, 
      'lambda: ', round(lRate,4))

### Run Mini-batch Gradient Descent on the data

Run 50 iterations and report average results


In [None]:
# since this approach is stochastic, set the seed again, so we all get the same answers
np.random.seed(553)

# your code here
start = time.time()

iters, lRate, myM, myB = gdMiniBatch(df['newX'], df['newY'], startM=-1, startB=-4, numIters=1000, \
                                     epsilon=.01, lRate=0.01, batchSize = 1, withBD=0)
end = time.time()
# your code here


print('misses: ', totalMisses)
print('time: ', round(totalTime/(numTimes-totalMisses),4))
print('m: ', round(totalM/(numTimes-totalMisses),2), 
      'b: ', round(totalB/(numTimes-totalMisses),2),
      'iters: ', iters, 
      'lambda: ', round(totalLRate/(numTimes-totalMisses),4))

### Run Mini-batch Gradient Descent with Bold Driver on the data

Run 50 iterations and report average results

In [None]:
# since this approach is stochastic, set the seed again, so we all get the same answers
np.random.seed(553)


# your code here

start = time.time()
iters, lRate, myM, myB = gdMiniBatch(df['newX'], df['newY'], startM=-1, startB=-4, numIters=1000, \
                                     epsilon=.01, lRate=0.01, batchSize = 1, withBD=1)
end = time.time()
# your code here


print(totalIters/(numTimes-totalMisses), totalLRate/(numTimes-totalMisses), totalM/(numTimes-totalMisses), totalB/(numTimes-totalMisses))
print('misses: ', totalMisses)
print('time: ', round(totalTime/(numTimes-totalMisses),4))
print('m: ', round(totalM/(numTimes-totalMisses),2), 
      'b: ', round(totalB/(numTimes-totalMisses),2),
      'iters: ', totalIters/(numTimes-totalMisses), 
      'lambda: ', round(totalLRate/(numTimes-totalMisses),4))

### Run Stochastic Gradient Descent on the data



In [None]:
# since this approach is stochastic, set the seed again, so we all get the same answers
np.random.seed(553)

start = time.time()
# your code here
iters, lRate, myM, myB = gdMiniBatch(df['newX'], df['newY'], startM=.1, startB=0, numIters=2000, \
                                     epsilon=.01, lRate=0.05, batchSize = 1, withBD=0)

end = time.time()
# your code here




print('misses: ', totalMisses)
print('time: ', round(totalTime/(numTimes-totalMisses),4))
print('m: ', round(totalM/(numTimes-totalMisses),2), 
      'b: ', round(totalB/(numTimes-totalMisses),2),
      'iters: ', round(totalIters/(numTimes-totalMisses),2), 
      'lambda: ', round(totalLRate/(numTimes-totalMisses),4))

### Run Stochastic Gradient Descent with Bold Driver on the data

In [None]:
# since this approach is stochastic, set the seed again, so we all get the same answers
np.random.seed(553)

# your code here
iters, lRate, myM, myB = gdMiniBatch(df['newX'], df['newY'], startM=.1, startB=0, numIters=2000, \
                                     epsilon=.01, lRate=0.05, batchSize = 1, withBD=1)

# your code here


start = time.time()
iters, lRate, myM, myB = gdMiniBatch()
end = time.time()

print('misses: ', totalMisses)
print('time: ', round(totalTime/(numTimes-totalMisses),4))
print('m: ', round(totalM/(numTimes-totalMisses),2), 
      'b: ', round(totalB/(numTimes-totalMisses),2),
      'iters: ', round(totalIters/(numTimes-totalMisses),0), 
      'lambda: ', round(totalLRate/(numTimes-totalMisses),4))