# **Gradient** **Descent**

In [None]:
import numpy as np

# Dummy data
X = np.array([1, 2, 3, 4])
y = np.array([2, 4, 6, 8])

# Parameters
weight = 0.0
learning_rate = 0.01
epochs = 1000

for epoch in range(epochs):
    gradient = 2 * np.dot(X, (np.dot(X, weight) - y)) / len(X)
    weight -= learning_rate * gradient
    print(weight)

print(f"Trained weight: {weight}")


0.3
0.5549999999999999
0.7717499999999999
0.9559875
1.112589375
1.24570096875
1.3588458234375
1.455018949921875
1.5367661074335939
1.6062511913185549
1.6653135126207717
1.715516485727656
1.7581890128685076
1.7944606609382314
1.8252915617974967
1.8514978275278722
1.8737731533986914
1.8927071803888877
1.9088011033305545
1.9224809378309713
1.9341087971563256
1.9439924775828767
1.9523936059454452
1.9595345650536284
1.9656043802955843
1.9707637232512467
1.9751491647635597
1.9788767900490258
1.9820452715416719
1.9847384808104211
1.987027708688858
1.9889735523855292
1.9906275195277
1.9920333915985449
1.9932283828587631
1.9942441254299488
1.9951075066154564
1.9958413806231379
1.9964651735296672
1.9969953975002173
1.9974460878751847
1.997829174693907
1.998154798489821
1.9984315787163478
1.9986668419088955
1.9988668156225613
1.999036793279177
1.9991812742873005
1.9993040831442055
1.9994084706725748
1.9994972000716886
1.9995726200609354
1.9996367270517952
1.9996912179940258
1.999737535294922
1.99

# **Momentum**

In [None]:
# Parameters
weight = 0.0
velocity = 0.0
learning_rate = 0.01
momentum = 0.9
epochs = 1000

for epoch in range(epochs):
    gradient = 2 * np.dot(X, (np.dot(X, weight) - y)) / len(X)
    velocity = momentum * velocity + learning_rate * gradient
    weight -= velocity
    print(weight)

print(f"Trained weight with Momentum: {weight}")


0.3
0.825
1.47375
2.1365625
2.7126093750000004
3.1241601562500003
3.3259318359375003
3.308636572265625
3.096775349121094
2.7415839459228515
2.310674091156006
1.8762541081924438
1.5038380072963713
1.2430878153954503
1.121949470375304
1.1446325393008765
1.2933524204387605
1.533197450397042
1.8190783597999391
2.1035094242925556
2.343970968692027
2.5087907133477474
2.5808098765357337
2.5585056419245613
2.454655984485822
2.292992895118083
2.1035471804194055
1.917513960127685
1.7624569678459838
1.658537129615555
1.616228705765836
1.6357168184362134
1.7078985970741212
1.81667740828712
1.9420767271357509
2.063624605029156
2.1634740043788474
2.2288173631367423
2.2533037815483365
2.2373459908865208
2.1873820806579083
2.114307249353471
2.031393813776457
1.9520626496906757
1.887855204559871
1.8468902232581663
1.832988206597907
1.8455281606139877
1.8799848951363622
1.9289982219360449
1.9837604827653526
2.0354824450969264
2.076709844430804
2.102308027166673
2.1100001875539545
2.1004231037694145
2.07

# NAG

In [None]:
# Parameters
weight = 0.0
velocity = 0.0
learning_rate = 0.01
momentum = 0.9
epochs = 1000

for epoch in range(epochs):
    for i in range(len(X)):
        look_ahead_weight = weight + momentum * velocity
        gradient = 2 * X[i] * (look_ahead_weight * X[i] - y[i])
        velocity = momentum * velocity + learning_rate * gradient
        weight -= velocity
        print(weight)

print(f"Trained weight with NAG: {weight}")


0.04
0.23568000000000003
0.76106976
1.78169047168
2.72298647556864
3.580087273302907
4.205912611302176
4.2435010772286095
4.233137267404504
4.044412662863113
3.4759928535247955
2.3283924069029394
1.2683273487660165
0.29647792435564624
-0.4289921907521833
-0.5135731864595863
-0.5409470767897905
-0.3642787320475657
0.2489132218372388
1.53773303206447
2.7301149572117787
3.830700991878021
4.669997182155527
4.8126819579154345
4.887412942904722
4.729058424881931
4.069655410262979
2.6239948976115115
1.284398649045234
0.039559203515193264
-0.9295809442704449
-1.1434535376732406
-1.2769195076635425
-1.1444948698810324
-0.4378508279774209
1.1817545587568443
2.684917212603763
4.091197935134667
5.208252434138247
5.508672400030261
5.714284480718525
5.616996664690035
4.862617604423573
3.050377647411472
1.3657338139261344
-0.22099869733570476
-1.506328858775351
-1.9112758557575384
-2.204791681872036
-2.1537057303055644
-1.35178541828697
0.6734672462429505
2.5591798473565586
4.347358107850461
5.823878

# AdaGrad

In [None]:
# Parameters
weight = 0.0
learning_rate = 0.01
cache = 0.0
epsilon = 1e-8
epochs = 1000

for epoch in range(epochs):
    gradient = 2 * np.dot(X, (np.dot(X, weight) - y)) / len(X)
    cache += gradient ** 2
    weight -= (learning_rate / (np.sqrt(cache) + epsilon)) * gradient

print(f"Trained weight with AdaGrad: {weight}")


Trained weight with AdaGrad: 0.5814307029318245


# RMSProp

In [None]:
# Parameters
weight = 0.0
learning_rate = 0.01
cache = 0.0
decay_rate = 0.9
epsilon = 1e-8
epochs = 1000

for epoch in range(epochs):
    for i in range(len(X)):
        gradient = 2 * X[i] * (weight * X[i] - y[i])
        cache = decay_rate * cache + (1 - decay_rate) * gradient ** 2
        weight -= (learning_rate / (np.sqrt(cache) + epsilon)) * gradient

print(f"Trained weight with RMSProp: {weight}")


Trained weight with RMSProp: 2.0


# **Adam**

In [None]:
# Parameters
weight = 0.0
m = 0.0
v = 0.0
learning_rate = 0.01
beta1 = 0.9
beta2 = 0.999
epsilon = 1e-8
epochs = 1000

for epoch in range(epochs):
    for i in range(len(X)):
        gradient = 2 * X[i] * (weight * X[i] - y[i])
        m = beta1 * m + (1 - beta1) * gradient
        v = beta2 * v + (1 - beta2) * gradient ** 2
        m_hat = m / (1 - beta1 ** (epoch + 1))
        v_hat = v / (1 - beta2 ** (epoch + 1))
        weight -= (learning_rate / (np.sqrt(v_hat) + epsilon)) * m_hat

print(f"Trained weight with Adam: {weight}")


Trained weight with Adam: 1.9999999999999984
