## **Gradient descent**

Suppose we have a funtion

$f(x) = x^2 - 4 x + 5$

Find $x$ that minimize a given function

$f'(x) = 2 x - 4 = 0$

therefore $x = 2$

In [None]:
# gradient by hand
alpha = 0.02
x = 10.0

def f(x):
    return x ** 2 - (4 * x) + 5

def compute_grad(x):
    grad = 2 * x - 4
    return grad

for _ in range(1000):
    x = x - alpha * compute_grad(x)
print("x_min = ", x)
print("f(x) = ", f(x))

## **Gradient descent with Pytorch**

In [None]:
import torch

In [None]:
x = torch.tensor(10, dtype=torch.float, requires_grad=True)
cost = torch.sum(x * x - 4 * x + 5)
cost.backward() # calculate gradient using "backward"
print(x.grad) # gradient

In [None]:
# gradient descpent with torch
x = torch.tensor(10, dtype=torch.float, requires_grad=True)  # initialize x

def loss(x):
    return torch.sum(x * x - 4 * x + 5)  # define cost function

for _ in range(1000):
    cost = loss(x)
    cost.backward(retain_graph=True)
    x.data.sub_(alpha * x.grad)
    x.grad.data.zero_()
print(x.data)

## **Example**

Try finding gradient (slope) of the following equation at $x = \pi$

$f(x) = 3 cos(x) + 4 sin(x) + 4 x$

In [None]:
# write your code here




## **Gradient descent for regression**

Here, we will use gradient descent to solve linear regression. There are multiple implementation the gradient descent to solve regression
1. compute gradient
2. using Pytorch to calculate gradient 

In [None]:
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
import matplotlib.pyplot as plt

In [None]:
data = pd.read_csv(
    "https://raw.githubusercontent.com/mattnedrich/GradientDescentExample/master/data.csv",
    header=None, names=['x', 'y']
).values

In [None]:
def compute_grad(X, y, theta):
    m = len(X)
    theta_grad = np.array([0, 0])
    for i in range(m):
        theta_grad[0] += (1./(m))*((theta[0] + theta[1]*X[i]) - y[i])
        theta_grad[1] += (1./(m))*((theta[0] + theta[1]*X[i]) - y[i])*X[i]
    return theta_grad

def compute_cost(X, y, theta):
    m = len(X)
    J = 0
    for i in range(m):
        J += (1./(2*m))*((theta[0] + theta[1]*X[i]) - y[i])**2
    return J

X = data[:,0]
y = data[:,1]
J = [] # history of cost
theta = np.array([0, -1]) # intial theta
n_iter = 3000
for i in tqdm(range(n_iter)):
    theta_grad = compute_grad(X, y, theta)
    theta = theta - 0.0001 * theta_grad
    J.append(compute_cost(X, y, theta))
print(f'final theta = {theta}')
print(f'final cost = {J[-1]}')

In [None]:
x = np.arange(0, 100, 0.2)
y_fit = theta[0] + (theta[1] * x)

plt.plot(x, y_fit)
plt.scatter(data[:, 0], data[:, 1])
plt.show()

## Gradient descent for regression using Pytorch

In [None]:
data = pd.read_csv(
    "https://raw.githubusercontent.com/mattnedrich/GradientDescentExample/master/data.csv",
    header=None, names=['x', 'y']
).values

In [None]:
plt.scatter(data[:, 0], data[:, 1])
plt.xlabel("$X$")
plt.ylabel("$Y$")
plt.show()

In [None]:
# create X, y as torch tensor
X = data[:, 0]
X = torch.tensor(np.vstack((np.ones(len(X)), X)).T).float()
y = torch.tensor(y).float()

# initialized theta
theta = torch.tensor([[0., -1.]], requires_grad=True).t()
theta.retain_grad()  # specific to Pytorch, non-leaf tensor

In [None]:
def cal_loss(y, theta):
    y_pred = torch.matmul(X, theta).view(-1)
    return ((y - y_pred) ** 2 / len(y)).sum() / (2 * len(y))

In [None]:
alpha = 0.001
for _ in range(2000):
    loss = cal_loss(y, theta)
    loss.backward(retain_graph=True)
    theta.data.sub_(alpha * theta.grad)
    theta.grad.data.zero_()

In [None]:
theta_ = theta.data.numpy().ravel()
print("Final parameters: ", theta_)
print("Cost: ", cal_loss(y, theta))

In [None]:
x = np.arange(0, 100, 0.2)
y_fit = theta_[0] + theta_[1] * x

plt.plot(x, y_fit)
plt.scatter(data[:, 0], data[:, 1])

## **Gradient Descent for logistic regression**

In [None]:
import torch
import pandas as pd
from sklearn.metrics import accuracy_score

In [None]:
# example intuition of logistic loss calculation
target = torch.tensor([1, 0, 1])
pred   = torch.tensor([0.9, 0.4, 0.2])
def calculate_loss(y, y_pred):
    loss = torch.where(y == 1, 1 - y_pred, y_pred).mean()
    return loss
calculate_loss(target, pred) # loss (0.1 + 0.4 + 0.8) / 3

In [None]:
path = "https://raw.githubusercontent.com/Benlau93/Machine-Learning-by-Andrew-Ng-in-Python/master/LogisticRegression/ex2data1.txt"
df = pd.read_csv(path, names=["x1", "x2", "y"], skiprows=1)
df.head()

In [None]:
df.plot(kind="scatter", x="x1", y="x2",
        c=df.y.map({1:'blue', 0:'red'}))
plt.show()

In [None]:
X = torch.tensor(df[['x1', 'x2']].values)
X = torch.cat((torch.ones(len(X), 1), X), dim=-1)
y = torch.tensor(df['y'].values.astype(float))

In [None]:
# try writing your own sigmoid function
def calculate_loss(y, y_pred):
    loss = ((y * (1 - y_pred)) + (1 - y) * (y_pred)).mean()
    return loss

In [None]:
theta = torch.tensor([[0.5, 0.5, 0.5]], requires_grad=True).double()
theta.retain_grad()
y_pred = torch.sigmoid(torch.matmul(X, theta.t()).view(-1))
print("Accuracy (initialized theta): ", accuracy_score(y, y_pred.detach().numpy()))

In [None]:
theta = torch.tensor([[0.5, 0.5, 0.5]], requires_grad=True).double()
theta.retain_grad()  # specific to Pytorch, non-leaf tensor

alpha = 0.01
for _ in tqdm(range(1000)):
    y_pred = torch.sigmoid(torch.matmul(X, theta.t()).view(-1))
    loss = calculate_loss(y, y_pred)
    loss.backward(retain_graph=True)  # calculate gradient
    theta.data.sub_(alpha * theta.grad)
    theta.grad.data.zero_()

In [None]:
# measure accuracy after training
y_pred = torch.sigmoid(torch.matmul(X, theta.t()).view(-1))
accuracy_score(y, (y_pred > 0.5).numpy().astype(int))

## **Using sklearn**

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
logist = LogisticRegression().fit(df[['x1', 'x2']].values, y.numpy())
y_pred = logist.predict(df[['x1', 'x2']].values)
accuracy_score(y, (y_pred > 0.5).astype(int))