In [None]:
%reload_ext autoreload
%autoreload 2

import time
import itertools
import numpy as np
import matplotlib.pyplot as plt

import tqdm 
import tqdm.notebook

from util import G1, G2
import multiprocessing
import baseline
import smooth
import convexhull
import clustering

## Dataset Generation
Create a specific dataset to be optimized over and set the random seed

In [None]:
# Here we would load a given data set

seed = 3
np.random.seed(42231 + seed)
N = 1000

# g = G1(N)
g = G2(N)

l = 1.0  # lambda

## Basic Optimization 
The following code runs all our methods on the same data set and outputs the best solution and the error plots
This code is just to simply visualize the convergence of all our method on the given dataset. For the best possible results, see the next section running our smooth method using many different random seeds.

In [None]:
if g.x.shape[0] <= 5000:
    methods = [baseline, convexhull, clustering, smooth]
else: # Dont run the (slow) clustering method for a lot of data points
    methods = [baseline, convexhull, smooth]
    
method_names = {baseline: 'Baseline', convexhull: 'Convex hull', 
                clustering: 'Clustering', smooth: 'Smooth (Final method)'}



# Specify some functions to select beta, roughly tuned to each method
def beta(i, n_iter):
    if i < n_iter // 2:
        return 0.4
    elif i < n_iter * 2 / 3:
        return 1
    else:
        return 5

def beta2(i, n_iter):
    if i < n_iter // 2:
        return 3
    elif i < n_iter * 2 / 3:
        return 5
    else:
        return 15

betas = {baseline: beta2, convexhull: beta2, 
                clustering: 10, smooth: beta}

n_iter = 5000
total_runs = len(methods)

def run_optimization(i):
    np.random.seed(i + seed)
    curr_selected, curr_loss_values, number_of_selected_cities = methods[i].optimize(
            g, l, beta=betas[methods[i]], n_iter=n_iter, verbose=True)    
    return curr_selected, curr_loss_values

losses = []
all_selected = []
for i in tqdm.notebook.tqdm(range(total_runs)):
    selected, loss_values = run_optimization(i)
    losses.append(loss_values)
    all_selected.append(selected)

# Select the best solution
losses = np.array(losses)
min_idx = np.argmin(losses[:, -1])
loss_values = losses[min_idx, :]
selected = all_selected[min_idx]

# Plot losses and final selection of cities
fig, axes = plt.subplots(1, 2, figsize=(14, 4))    
for i in range(total_runs):
    axes[0].plot(np.arange(losses[i].shape[0]), losses[i])

axes[0].hlines(0.0, 0, n_iter, color='black', linestyle='--')
axes[0].set_title(f'Loss values (best: {np.min(loss_values):.3f})')
axes[0].legend([method_names[m] for m in methods])
m = selected == 1
not_selected = selected == 0
selected_pos = g.x[selected == 1, :]
not_selected_pos = g.x[selected == 0, :]

axes[1].scatter(not_selected_pos[:, 0], not_selected_pos[:, 1], s=8)
axes[1].scatter(selected_pos[:, 0], selected_pos[:, 1], c='r', s=8)
axes[1].set_title('Selected cities')
axes[1].set_aspect(1)

## Multithreaded Optimization
The following runs our smooth optimization methods using many different initializations over different threads and outputs the best result.

In [None]:
n_iter = 5000
use_multithreading = True
total_runs = 12
n_threads = 12


def beta(i, n_iter):
    if i < n_iter // 2:
        return 0.4
    elif i < n_iter * 2 / 3:
        return 1
    else:
        return 5


def run_optimization(i):
    rng = np.random.RandomState(i + seed) # Thread-safe random number generation
    curr_selected, curr_loss_values, number_of_selected_cities = smooth.optimize(
        g, l, beta=beta, n_iter=n_iter, verbose=not use_multithreading, rng=rng)
    return curr_selected, curr_loss_values


losses = []
all_selected = []
if use_multithreading:
    with multiprocessing.Pool(n_threads) as p:
        with tqdm.notebook.tqdm(range(total_runs)) as pbar:
            for i, result in enumerate(p.imap_unordered(run_optimization, range(total_runs))):
                selected, loss_values = result
                pbar.update()
                losses.append(loss_values)
                all_selected.append(selected)
else:
    for i in tqdm.notebook.tqdm(range(total_runs)):
        selected, loss_values = run_optimization(i)
        losses.append(loss_values)
        all_selected.append(selected)

# Select the best solution
losses = np.array(losses)


# Plot losses and final selection of cities
fig, axes = plt.subplots(1, 2, figsize=(14, 4))
for i in range(total_runs):
    axes[0].plot(np.arange(losses[i].shape[0]), losses[i])

axes[0].hlines(0.0, 0, n_iter, color='black', linestyle='--')
axes[0].set_title(f'Loss values (best: {np.min(loss_values):.3f})')
m = selected == 1
not_selected = selected == 0
selected_pos = g.x[selected == 1, :]
not_selected_pos = g.x[selected == 0, :]

axes[1].scatter(not_selected_pos[:, 0], not_selected_pos[:, 1], s=8)
axes[1].scatter(selected_pos[:, 0], selected_pos[:, 1], c='r', s=8)
axes[1].set_title('Selected cities')
axes[1].set_aspect(1)