In [357]:
import datetime

import numpy as np
import pandas as pd

import plotly
import plotly.express as px
import plotly.figure_factory as ff
import plotly.graph_objects as go
import plotly.io as pio
from plotly.subplots import make_subplots
from plotly.graph_objs.scatter import Line
from plotly.graph_objs import Scatter

In [313]:
def warmUpExercise():    
    return np.eye(5)

In [314]:
# 5x5 Identity Matrix
warmUpExercise()

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [315]:
# Plotting Data
d = np.loadtxt('ex1data1.txt', delimiter=',')
X = d[:, 0]
Y = d[:, 1]

data = pd.DataFrame({'X':X, 'Y': Y})

fig = px.scatter(data, x='X',y='Y',
           marginal_x='histogram', marginal_y='histogram',
           width=800, height=600
 )
fig.show()

In [316]:
m = data.shape[0]
data['X0'] = np.ones(m)
theta = np.zeros(2)
iterations = 1500
alpha = 0.01

data, theta, iterations, alpha

(          X         Y   X0
 0    6.1101  17.59200  1.0
 1    5.5277   9.13020  1.0
 2    8.5186  13.66200  1.0
 3    7.0032  11.85400  1.0
 4    5.8598   6.82330  1.0
 ..      ...       ...  ...
 92   5.8707   7.20290  1.0
 93   5.3054   1.98690  1.0
 94   8.2934   0.14454  1.0
 95  13.3940   9.05510  1.0
 96   5.4369   0.61705  1.0
 
 [97 rows x 3 columns], array([0., 0.]), 1500, 0.01)

In [317]:
def computeCost(X, y, theta):
    xt = X.T @ theta
    cost = xt - y
    J = 1/(2 * m) * (cost **2).sum()
    return J

In [318]:
xx = np.array([data['X0'], data['X']])
J = computeCost(xx, Y, theta)
# approx 32.07
J

32.072733877455676

In [319]:
J = computeCost(xx, Y, np.array([-1, 2]))
# approx 54.24
J

54.24245508201238

In [320]:
def gradientDescent(X, y, theta, alpha, iterations):
    N = X.shape[0]
    J = np.zeros(iterations)

    for i in range(iterations):
        xt = X.T @ theta
        cost = xt - Y

        for j in range(N):
            grad = (1/m * cost * X[j]).sum()
            theta[j] -= alpha * grad
            
        J[i] = computeCost(X, y, theta)
        
    return theta, J

In [321]:
th, J = gradientDescent(xx, Y, theta, alpha, iterations)
# approx -3.6303 1.1666
th, J

(array([-3.63029144,  1.16636235]),
 array([6.73719046, 5.93159357, 5.90115471, ..., 4.48343473, 4.48341145,
        4.48338826]))

In [348]:
px.scatter({'J': J}, y='J', title='cost')

In [368]:
fig = go.Figure()
h = theta @ xx
fig.add_trace(Scatter(x=X, y=h))
fig.add_trace(Scatter(x=X, y=Y, mode='markers'))
fig.show()

px.scatter({'X':X, 'Y':Y}, x='X', y='Y', trendline='ols')

In [390]:
predict1 = [1, 3.5] @ theta
print('For population = 35,000, predict: {}'.format(predict1 * 10000))
predict2 = [1, 7] @ theta
print('For population = 70,000, predict: {}'.format(predict2 * 10000))

For population = 35,000, predict: 4519.767867701776
For population = 70,000, predict: 45342.45012944714
