### Gradient Descent for Linear Regression - using fors

In [3]:
import numpy as np

Model function for linear regression with one variable is f_wb(x)=w*x+b. Cost
function (J) for the model is 1/2*1/nr_samples*sum((f_wb(xi)-y(xi)^2) for each
example i


In [15]:
def compute_cost(x, y, w, b):
    nr_samples = x.shape[0]
    f_wb = np.zeros(nr_samples)
    cost = 0
    for i in range(0,nr_samples):
        f_wb[i] = w*x[i]+b
        cost = cost+(f_wb[i]-y[i])**2
    cost = cost/(2*nr_samples)
    return cost

x = np.array([1,2,10,20])
y = np.array([19,23,85,150])
w = 0
b = 0

compute_cost(x,y,w,b)

3826.875

Gradient descent for linear regression with one variable:
repeat until converges (until cost doesn't suffer sufficient modifications and it's low enough - or limit number of steps)
  w = w - a * dj_dw
  b = b - a * dj_db
where
dj_dw = J derived wrt w, which is 1/nr_samples*sum((f_wb(xi)-yi)*xi) for each example i
dj_db = J derived wrt b, which is 1/nr_samples*sum(f_wb(xi)-yi) for each example i


In [13]:
def compute_one_step(x,y,w,b):
    nr_samples = x.shape[0]
    f_wb = np.zeros(nr_samples)
    dj_dw = 0
    dj_db = 0
    
    for i in range(0, nr_samples):
        f_wb[i] = w*x[i]+b
        
        dj_dw = dj_dw+(f_wb[i]-y[i])*x[i]
        dj_db = dj_db+(f_wb[i]-y[i])
        
    dj_dw = dj_dw/nr_samples
    dj_db = dj_db/nr_samples
    return dj_dw, dj_db

def gradient_descent(x,y,w,b):
    a = 0.01
    cost = compute_cost(x,y,w,b)
    
    while True:
        dj_dw, dj_db = compute_one_step(x,y,w,b)
        w = w - a*dj_dw
        b = b - a*dj_db

        old_cost = cost
        cost = compute_cost(x,y,w,b)

        if cost-old_cost==0:
            break
    return w, b


w, b = gradient_descent(x,y,w,b)
predictions = w*x+b
predictions
    
    

302.6299773437499
49.43950123087897
31.034944924938422
29.484543701058424
29.145969889426247
28.896457821630953
28.65540061340258
28.4169937251955
28.180801816516613
27.946775098799588
27.714891626712504
27.485131621641873
27.257475627189503
27.031904375255106
26.808398774911467
26.586939910107084
26.367509038016657
26.15008758745113
25.934657157285628
25.721199514902263
25.50969659464696
25.300130496300476
25.09248348356342
24.886737982555328
24.682876580327072
24.480882023387387
24.280737216242493
24.082425219949307
23.885929250681833
23.69123267831067
23.498319024995585
23.307171963790886
23.117775317263657
22.930113056124664
22.7441692978715
22.55992830544473
22.377374485895757
22.196492389067235
22.017266706285568
21.839682269064998
21.663724047824097
21.489377150613745
21.316626821856623
21.14545844109857
20.975857521771058
20.80780970996524
20.64130078321707
20.476316649303627
20.312843345050556
20.150867035150107
19.990374010990323
19.83135068949479
19.673783611973185
19.517659

array([ 18.48442365,  25.48657199,  81.50375877, 151.52524224])

In [8]:
import plotly.express as px
import plotly.graph_objects as go

prediction_line = np.zeros(len(x))
for i, elem in enumerate(x):
    prediction_line[i] = w*elem+b

prediction_line

array([ 18.48442365,  25.48657199,  81.50375877, 151.52524224])

In [10]:

fig = px.scatter(x=x, y=y)
fig2 = px.line(x=x, y=prediction_line)
fig3 = go.Figure(data=fig.data+fig2.data)
fig3.show()

# Vectorized version


In [3]:
import numpy as np
def compute_cost(x, y, w, b):
    nr_samples = x.shape[0]
    f_wb = np.dot(w,x)+b
    cost = sum((f_wb - y)**2)/(2*nr_samples)
    return cost

x = np.array([1,2,10,20])
y = np.array([19,23,85,150])
w = 0
b = 0

compute_cost(x,y,w,b)

3826.875

In [52]:
def compute_one_step(x,y,w,b):
    nr_samples = x.shape[0]

    f_wb = np.dot(w,x)+b
    dj_dw = sum((f_wb-y)*x)/nr_samples
    dj_db = sum(f_wb-y)/nr_samples

    return dj_dw, dj_db

def gradient_descent(x,y,w,b):
    a = 0.000001
    cost = compute_cost(x,y,w,b)
    iterations = 0

    while True:
        dj_dw, dj_db = compute_one_step(x,y,w,b)
        w = w - a*dj_dw
        b = b - a*dj_db

        old_cost = cost
        cost = compute_cost(x,y,w,b)
        print(cost)

        if cost-old_cost==0 or iterations > 1000:
            break
        iterations += 1
    return w, b


w, b = gradient_descent(x,y,w,b)
predictions = w*x+b
predictions

296621.0551540365
294677.0404179043
292745.77395904413
290827.1721779974
288921.1520235269
287027.63098902063
285146.5271089207
283277.75895517546
281421.2456337145
279576.90678094665
277744.66256028216
275924.4336586755
274116.1412831935
272319.7071576033
270535.05351898476
268762.1031143643
267000.77919737075
265251.0055249125
263512.70635387866
261785.80643785847
260070.23102388572
258365.90584920175
256672.75713804155
254990.71159843993
253319.6964190585
251659.63926603485
250010.46827985015
248372.11207221966
246744.49972300176
245127.56077712864
243521.22524155557
241925.42358223218
240340.08672109182
238765.14603306144
237200.53334309105
235646.18092320266
234102.0214895585
232567.98819954842
231044.01464889655
229530.0348687866
228025.98332300648
226531.79490511134
225047.4049356053
223572.74915914156
222107.76374174113
220652.38526802923
219206.55073849094
217770.19756674315
216343.2635768263
214925.68700051215
213517.4064746306
212118.36103841313
210728.490130854
209347.73358

array([2.03690317e-01, 1.31833222e+01, 2.61629540e+01, 3.91425859e+01,
       5.21222178e+01, 6.51018496e+01, 7.80814815e+01, 9.10611133e+01,
       1.04040745e+02, 1.17020377e+02, 1.30000009e+02, 1.42979641e+02,
       1.55959273e+02, 1.68938904e+02, 1.81918536e+02, 1.94898168e+02,
       2.07877800e+02, 2.20857432e+02, 2.33837064e+02, 2.46816696e+02,
       2.59796327e+02, 2.72775959e+02, 2.85755591e+02, 2.98735223e+02,
       3.11714855e+02, 3.24694487e+02, 3.37674119e+02, 3.50653750e+02,
       3.63633382e+02, 3.76613014e+02, 3.89592646e+02, 4.02572278e+02,
       4.15551910e+02, 4.28531542e+02, 4.41511174e+02, 4.54490805e+02,
       4.67470437e+02, 4.80450069e+02, 4.93429701e+02, 5.06409333e+02,
       5.19388965e+02, 5.32368597e+02, 5.45348228e+02, 5.58327860e+02,
       5.71307492e+02, 5.84287124e+02, 5.97266756e+02, 6.10246388e+02,
       6.23226020e+02, 6.36205651e+02, 6.49185283e+02, 6.62164915e+02,
       6.75144547e+02, 6.88124179e+02, 7.01103811e+02, 7.14083443e+02,
      

In [53]:
import plotly.express as px
import plotly.graph_objects as go

def plot_pred(x,y,comp_w, comp_b):
    y_pred= comp_w*x+comp_b
    fig = px.scatter(x=x, y=y)
    fig2 = px.line(x=x, y=y_pred)
    fig2.update_traces(line_color='red', line_width=1)
    fig3 = go.Figure(data=fig.data+fig2.data)
    fig3.show()

In [54]:
nr_entries = 100

x = np.arange(nr_entries)
true_w = 13
true_b = 17
y = np.dot(true_w,x)+true_b
rands = np.random.random_sample(len(x))*34
y = y+rands

fig = px.scatter(x=x, y=y)
fig.show()

In [55]:
w, b = 0, 0
comp_w, comp_b = gradient_descent(x,y,w,b)
comp_w, comp_b

298026.7084555494
296073.4930341785
294133.08622545906
292205.4040342711
290290.36301630957
288387.8802744737
286497.8734552768
284620.26074528217
282754.960867562
280901.89307817817
279060.9771626881
277232.1334326718
275415.2827222823
273610.34638481925
271817.24628932436
270035.90481719887
268266.24485884426
266508.189810324
264761.6635700478
263026.59053547704
261302.89559985255
259590.50414894318
257889.34205781575
256199.33568762706
254520.41188243552
252852.4979660347
251195.52173880758
249549.41147460084
247914.09591762035
246289.50427934644
244675.56623546983
243072.21192284717
241479.37193647775
239896.9773264976
238324.95959519636
236763.25069405124
235211.78302078164
233670.4894164231
232139.30316241964
230618.15797773632
229106.98801598948
227605.72786259666
226114.31253194527
224632.67746457912
223160.758524404
221698.49199591146
220245.81458142045
218802.6633983377
217368.97597643547
215944.69025514732
214529.74458088196
213124.07770435407
211727.6287779333
210340.337353

(13.010309318605342, 0.20426816236087664)

In [56]:
plot_pred(x,y,comp_w, comp_b)

# Multiple linear regression


In [71]:
def generate_dataset(n):
    x = []
    y = []
    random_x1 = np.random.rand()
    random_x2 = np.random.rand()
    for i in range(n):
        x1 = i
        x2 = i/2 + np.random.rand()*n
        x.append([1, x1, x2])
        y.append(random_x1 * x1 + random_x2 * x2 + 1)
    return np.array(x), np.array(y)

x, y = generate_dataset(200)

array([[  1.        ,   0.        , 147.16290973],
       [  1.        ,   1.        ,  56.15116596],
       [  1.        ,   2.        , 115.46163896],
       [  1.        ,   3.        , 157.74669015],
       [  1.        ,   4.        ,  40.20029105],
       [  1.        ,   5.        , 152.5119573 ],
       [  1.        ,   6.        , 197.11919353],
       [  1.        ,   7.        , 179.50753445],
       [  1.        ,   8.        , 147.47939064],
       [  1.        ,   9.        , 153.90580875],
       [  1.        ,  10.        ,  57.41424659],
       [  1.        ,  11.        , 132.6119107 ],
       [  1.        ,  12.        ,  26.09567316],
       [  1.        ,  13.        , 198.40963085],
       [  1.        ,  14.        , 123.63297567],
       [  1.        ,  15.        , 118.26929572],
       [  1.        ,  16.        , 121.02871484],
       [  1.        ,  17.        ,  34.36668925],
       [  1.        ,  18.        ,  62.43598211],
       [  1.        ,  19.     

In [79]:
fig = px.scatter_3d(x=x[:,1], y=x[:,2], z=y)
fig.show()