# Problem Statement: **BONUS EXERCISE**

Imports and CUDA

In [1]:
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# Check if CUDA (GPU) is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


Problem 1: **Gradient Descent for Demand Forecasting at AtliQ**

AtliQ wants to optimize the prediction of regional product demands using gradient descent.

Assume the loss function is

$$L(w)=(w‚àí4)^2$$

where **w** is a weight parameter initialized at 0.

**Write code to:**

* Perform 10 iterations of gradient descent using a learning rate of 0.1.

* Print the weight **w** at each step.



In [2]:
learning_rate = 0.1
w = 0.0

for i in range (10):
  gradient = 2*(w-4)
  w = w - learning_rate * gradient
  print(f"Step {i+1}: w = {w:.4f}")

Step 1: w = 0.8000
Step 2: w = 1.4400
Step 3: w = 1.9520
Step 4: w = 2.3616
Step 5: w = 2.6893
Step 6: w = 2.9514
Step 7: w = 3.1611
Step 8: w = 3.3289
Step 9: w = 3.4631
Step 10: w = 3.5705




---



Problem 2: **Momentum for Contour Navigation in AtliQ's Supply Chain**

AtliQ's supply chain optimization problem is represented by a contour map of a quadratic function:

$$f(x,y)=x^2 +3y^2$$

Write a code to implement gradient descent (5 iterations) with momentum to minimize this function.

Use:
* Initial point (x, y) = (2, 2)
* Learning rate (Œ∑) = 0.1
* Momentum Coefficient (Œ≤)) = 0.9

In [4]:
def gradient(x, y):
  return (2*x, 6*y)

x, y = 2,2
learning_rate = 0.1
momentum = 0.9
vx, vy = 0.0, 0.0 # initialized velocity

for i in range(5):
  dx, dy = gradient(x, y)
  vx = momentum*vx + (1-momentum)*dx
  vy = momentum*vy + (1-momentum)*dy
  x = x - learning_rate*vx
  y = y - learning_rate*vy
  print(f"Step {i+1}: x = {x:.4f}, y = {y:.4f}")

Step 1: x = 1.9600, y = 1.8800
Step 2: x = 1.8848, y = 1.6592
Step 3: x = 1.7794, y = 1.3609
Step 4: x = 1.6490, y = 1.0108
Step 5: x = 1.4986, y = 0.6351




---



Problem 3: **RMS Prop for AtliQ's Dynamic Pricing Optimization**

AtliQ's AI model adjusts product prices dynamically. Implement the RMSProp optimizer for minimizing the function:

$$f(w) = w^2 + 5$$

Use:

* Initial weight (ùë§) = 5.0
* Learning rate (Œ∑) = 0.01
* Momentum Coefficient(Œ≤)=0.9


Run the optimization for 15 iterations and print the weight updates.

In [6]:
def gradient(w):
  return 2*w

w = 5.0
learning_rate = 0.01
beta = 0.9
epsilon = 1e-8
squared_gradient_average = 0.0 # initialized squared gradient average

for i in range(15):
  grad = gradient(w)
  squared_gradient_average = squared_gradient_average * beta + (1-beta)*grad**2
  w = w - (learning_rate * grad) / ((squared_gradient_average + epsilon)**0.5)
  print(f"Step {i+1}: w = {w:.4f}")

Step 1: w = 4.9684
Step 2: w = 4.9455
Step 3: w = 4.9264
Step 4: w = 4.9094
Step 5: w = 4.8939
Step 6: w = 4.8794
Step 7: w = 4.8657
Step 8: w = 4.8526
Step 9: w = 4.8400
Step 10: w = 4.8277
Step 11: w = 4.8158
Step 12: w = 4.8041
Step 13: w = 4.7927
Step 14: w = 4.7814
Step 15: w = 4.7704




---



Problem 4: **Adam Optimizer for AtliQ AI Models**

AtliQ is training an AI model to recommend warehouse restocking schedules. Use the Adam optimizer to minimize the function:

$$f(x) = x^4 - 3x^3 + 2$$

Write code to:

* Initialize x = 3.0

Run the optimizations for 19 iterations (starting from 1) with:
* Learning rate (Œ∑) = 0.01
* Momentum Coefficients: Œ≤1 = 0.9, Œ≤2 = 0.09


In [9]:
def gradient(x):
  return 4*x**3 - 9*x**2

x = 3.0
learning_rate = 0.01
beta1, beta2 = 0.9, 0.09
epsilon = 1e-8
first_moment, second_moment = 0.0, 0.0 # initialized first and second moment

for t in range(1, 20):
  grad = gradient(x)
  m = beta1*first_moment + (1-beta1)*grad
  v = beta2*second_moment + (1-beta2)*grad**2
  m_hat = m / (1 - beta1**t)
  v_hat = v / (1 - beta2**t)
  x = x - (learning_rate * m_hat) / ((v_hat+epsilon)**0.5)
  first_moment, second_moment = m, v
  print(f"Step {t}: x = {x:.4f}")

Step 1: x = 2.9900
Step 2: x = 2.9799
Step 3: x = 2.9697
Step 4: x = 2.9595
Step 5: x = 2.9491
Step 6: x = 2.9387
Step 7: x = 2.9281
Step 8: x = 2.9174
Step 9: x = 2.9067
Step 10: x = 2.8958
Step 11: x = 2.8849
Step 12: x = 2.8739
Step 13: x = 2.8627
Step 14: x = 2.8515
Step 15: x = 2.8401
Step 16: x = 2.8287
Step 17: x = 2.8171
Step 18: x = 2.8054
Step 19: x = 2.7937




---

