<a href="https://colab.research.google.com/github/abdipourasl/Convex-Optimization-1402/blob/main/Convex2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<div class="alert alert-block alert-success">
<h1>Convex Optimization Project #2<h1>
Amin Abdipour 401133011</h1>
</div>

# Q1

## Define MyGD

In [46]:
import torch
import torch.nn as nn

class MyGD(torch.optim.Optimizer):
    def __init__(self, params, lr=0.001):
        defaults = dict(lr=lr)
        super(MyGD, self).__init__(params, defaults)

    def step(self, closure=None):
        loss = None
        if closure is not None:
            with torch.enable_grad():
                loss = closure()

        for group in self.param_groups:
            for p in group['params']:
                if p.grad is None:
                    continue
                grad = p.grad.data
                p.data.add_(-group['lr'], grad)

        return loss

## a) Optimization Problem <h1>
$ \min f(x_1, x_2) = \frac{x_2^2}{x_1^2}$ <h1>
Subject to $ x_2>0 $

### Define the objective function

In [47]:
def objective(x1, x2):
    return x1**2 / x2**2

def grad_objective(x1, x2):
    grad_x1 = 2 * x1 / x2**2
    grad_x2 = -2 * x1**2 / x2**3
    return torch.tensor([grad_x1, grad_x2])

# Initial values
x1 = torch.tensor([1.0], requires_grad=True)
z = torch.tensor([0.0], requires_grad=True)

### Use MyGD

In [48]:
optimizer = MyGD([x1, z], lr=0.01)

### GD Loop

In [49]:
for i in range(1000):
    x2 = torch.exp(z)
    loss = objective(x1, x2)

    optimizer.zero_grad()
    loss.backward()

    optimizer.step()

    # Print or log the iteration information
    print(f'Iteration {i + 1}/{1000}, x1: {x1.item()}, x2: {x2.item()}, Loss: {loss.item()}')

Iteration 1/1000, x1: 0.9800000190734863, x2: 1.0, Loss: 1.0
Iteration 2/1000, x1: 0.9611685276031494, x2: 1.020201325416565, Loss: 0.9227423071861267
Iteration 3/1000, x1: 0.9433681964874268, x2: 1.0392037630081177, Loss: 0.8554559350013733
Iteration 4/1000, x1: 0.9264852404594421, x2: 1.0571366548538208, Loss: 0.7963430881500244
Iteration 5/1000, x1: 0.9104242324829102, x2: 1.074108362197876, Loss: 0.7440134882926941
Iteration 6/1000, x1: 0.8951044678688049, x2: 1.0902107954025269, Loss: 0.697375476360321
Iteration 7/1000, x1: 0.880456805229187, x2: 1.1055231094360352, Loss: 0.6555590629577637
Iteration 8/1000, x1: 0.8664217591285706, x2: 1.1201132535934448, Loss: 0.6178630590438843
Iteration 9/1000, x1: 0.8529475927352905, x2: 1.1340405941009521, Loss: 0.5837158560752869
Iteration 10/1000, x1: 0.8399890661239624, x2: 1.1473573446273804, Loss: 0.552646279335022
Iteration 11/1000, x1: 0.827506422996521, x2: 1.1601094007492065, Loss: 0.5242632031440735
Iteration 12/1000, x1: 0.81546449

In [50]:
x2 = torch.exp(z)
final_result = (x1.item(), x2.item())
print(f'Optimal solution: x1 = {final_result[0]}, x2 = {final_result[1]}, Minimum value: {objective(*final_result)}')

Optimal solution: x1 = 0.00043399748392403126, x2 = 1.654006004333496, Minimum value: 6.884941585300933e-08


## b) Classify MNIST Dataset with MyGD Optimizer

In [51]:
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

### Load MNIST dataset

In [52]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

train_dataset = torchvision.datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform=transform, download=True)

train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=False)

### Define Network

In [53]:
class MyNet(nn.Module):
    def __init__(self):
        super(MyNet, self).__init__()
        self.fc1 = nn.Linear(28 * 28, 256)
        self.fc2 = nn.Linear(256, 256)
        self.fc3 = nn.Linear(256, 10)

    def forward(self, x):
        x = x.view(-1, 28*28)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [54]:
model = MyNet()
optimizer = MyGD(model.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()

### Train

In [55]:
loss_values = []

for epoch in range(20):
    for i, (images, labels) in enumerate(train_loader):
        # Forward pass
        outputs = model(images)
        loss = loss_fn(outputs, labels)

        # Backward pass
        optimizer.zero_grad()
        loss.backward()

        optimizer.step()
        loss_values.append(loss.item())

        # Print the loss
    print(f'Epoch [{epoch + 1}/{20}], Loss: {loss.item()}')

Epoch [1/20], Loss: 2.067448616027832
Epoch [2/20], Loss: 1.6720539331436157


KeyboardInterrupt: ignored

In [None]:
plt.errorbar(range(1, len(loss_values) + 1), loss_values, fmt='-', yerr=None, ecolor='red', capsize=5)
plt.title('Training Loss with Error Bars')
plt.xlabel('Training Step')
plt.ylabel('Loss')
plt.show()

### Test Model

In [None]:
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_loader:
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = correct / total
print(f'Test Accuracy: {100 * accuracy:.2f}%')


# Q2

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import minimize

# Define the objective function
def objective_function(x):
    return np.log(np.exp(x[0]) + np.exp(x[1]))

# Define the gradient of the objective function
def gradient(x):
    exp_term = np.exp(x)
    return np.array([exp_term[0] / (exp_term[0] + exp_term[1]), exp_term[1] / (exp_term[0] + exp_term[1])])

# Backtracking Line Search Algorithm
def backtracking_line_search(x, p, alpha_bar, rho, c):
    alpha = alpha_bar
    while objective_function(x + alpha * p) > objective_function(x) + c * alpha * np.dot(gradient(x), p):
        alpha *= rho
    return alpha

def trust_region(x, radius):
    result = minimize(objective_function, x, method='trust-constr', jac=gradient,
                      options={'disp': False, 'xtol': 1e-8, 'gtol': 1e-8})
    return result.x

# Plot Contour Lines of the Objective Function
def plot_contour():
    x1 = np.linspace(-2, 2, 400)
    x2 = np.linspace(-2, 2, 400)
    X1, X2 = np.meshgrid(x1, x2)
    Z = np.log(np.exp(X1) + np.exp(X2))
    plt.contour(X1, X2, Z, levels=20, cmap='viridis')
    plt.xlabel('x1')
    plt.ylabel('x2')
    plt.title('Contour Plot of Objective Function')

# Main Optimization Loop
def optimize(starting_point):
    x_k = starting_point
    alpha_bar = 1.0
    rho = 0.5
    c = 0.1
    trust_region_radius = 1.0

    path_backtracking = [x_k.copy()]
    path_trust_region = [x_k.copy()]

    while True:
        gradient_k = gradient(x_k)
        p_k = -gradient_k  # Newton direction

        # Backtracking Line Search
        alpha_k = backtracking_line_search(x_k, p_k, alpha_bar, rho, c)
        x_k += alpha_k * p_k
        path_backtracking.append(x_k.copy())

        # Trust Region with Dogleg Method
        x_k = trust_region(x_k, trust_region_radius)
        path_trust_region.append(x_k.copy())

        # Check convergence criterion (you may need to define your own convergence criteria)
        if np.linalg.norm(gradient_k) < 1e-6:
            break

    return path_backtracking, path_trust_region

# Run the optimization
starting_point = np.array([1.2, 1.2])
path_backtracking, path_trust_region = optimize(starting_point)

# Plot the results
plot_contour()
path_backtracking = np.array(path_backtracking)
path_trust_region = np.array(path_trust_region)
plt.plot(path_backtracking[:, 0], path_backtracking[:, 1], marker='o', label='Backtracking Line Search')
plt.plot(path_trust_region[:, 0], path_trust_region[:, 1], marker='s', label='Trust Region (Dogleg Method)')
plt.legend()
plt.show()
