In [None]:
# This reloads python code imported from *.py files automatically
%reload_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import matplotlib.pyplot as plt

import tqdm 
import tqdm.notebook

from optimization import *

import time
# optimal solution
from optimal_solution import *
from clustering import *

In [None]:
from scipy.cluster.vq import kmeans,vq,whiten
import itertools

### Clustering tests

In [None]:
n_iter = 5000

# parameters
l = 0.5 # lambda in [0,1], or in [0,2] for G2
beta = 50

# initialization
N = 10
n_selected = 0
g = G1(N)
# g = G2(N)
citiesX = g.x
citiesV = g.v

#### Simple clusters

* Automatic selection of centroids

In [None]:
start_time = time.time()
data = whiten(g.x)
centroids,_ = kmeans(data,10)
clx,_ = vq(data,centroids)
centroids_V=[np.sum(g.v, where=(clx == k)) for k in range (centroids.shape[0])]
d1=(time.time() - start_time)

fig,axes=plt.subplots(1,2,figsize=(12,4))
axes[0].scatter(data[:,0],data[:,1], s=g.v,c=clx ,label='cities')
sequence = np.arange(centroids.shape[0])
axes[1].scatter(centroids[:, 0], centroids[:, 1],s=centroids_V, c=sequence,label='centroids')
print(np.sum(g.v))
print(np.sum(centroids_V))
print(d1)

* with defined centroids: population is involved

In [None]:
start_time = time.time()
ind = np.argpartition(g.v, -10)[-10:]
data = whiten(g.x)
centroids=(data[ind])
clx,_ = vq(data,centroids)
centroids_V=[np.sum(g.v, where=(clx == k)) for k in range (centroids.shape[0])]
d1=(time.time() - start_time)

fig,axes=plt.subplots(1,2,figsize=(12,4))
axes[0].scatter(data[:,0],data[:,1], c=clx ,label='cities') #s=g.v
sequence = np.arange(centroids.shape[0])
axes[1].scatter(centroids[:, 0], centroids[:, 1], c=sequence,label='centroids') #s=centroids_V
print(np.sum(g.v))
print(np.sum(centroids_V))

print(d1)

### Study case 10 cities

In [None]:
#n_iter = 5000
n_iter = 10000
# parameters
l = 0.2 # lambda in [0,1], or in [0,2] for G2
beta = 20
# initialization
N = 10000
n_selected = 0
g = G1(N)
# g = G2(N)
f_test=0
brute_force_f=0

In [None]:
start_time = time.time()
# ind = np.argpartition(g.v, -10)[-10:]
nclusters=10
ind = np.argpartition(g.v, -nclusters)[-nclusters:]
#data = whiten(g.x)
data=g.x
centroids=(data[ind])
centroids,_ = kmeans(data,nclusters)
clx,_ = vq(data,centroids)
#centroids_V=[np.sum(g.v, where=(clx == k)) for k in range (centroids.shape[0])]
centroids_V=g.v[ind]
d1=(time.time() - start_time)
print('time clustering= '+str(d1))

start_time = time.time()
pairwise_distances = (scipy.spatial.distance.pdist(g.x, 'sqeuclidean'))
# start_time = time.time()
# distances=np.zeros((nclusters,nclusters))
# for i in range (nclusters):
#     for j in range (i+1,nclusters):
#         m1=np.where(clx==i)
#         m2=np.where(clx==j)
#         k=square_to_condensed_list(m1[0], m2[0], N)
#         distances[i][j]=distances[j][i]=np.amax(pairwise_distances[k])
# d1=(time.time() - start_time)
# print('computing method1 '+str(d1))


pairwise_distances = scipy.spatial.distance.squareform(pairwise_distances)
distances=np.zeros((nclusters,nclusters))
for i in range (nclusters):
    for j in range (i+1,nclusters):
        distances[i][j]=distances[j][i]=np.amax(pairwise_distances[(clx==i)][:,(clx==j)])
d1=(time.time() - start_time)
print('computing method2 '+str(d1))

fig,axes=plt.subplots(1,2,figsize=(12,4))
axes[0].scatter(data[:,0],data[:,1],c=clx ,label='cities')
sequence = np.arange(centroids.shape[0])
axes[1].scatter(centroids[:, 0], centroids[:, 1], c=sequence,label='centroids')

In [None]:
gbis=G1(N)
gbis.x=np.array(centroids)
gbis.v=np.array(centroids_V)
result=bruteforce_sol_N(10,N, l, gbis,distances)
print(result)
best_selected_cities=np.array(result[0])
# brute_force_f, max_dist, max_idx, convex_hull = objective_function_(N, l, gbis, None, best_selected_cities, None, None)
# print(brute_force_f)
print(result[1])
brute_force_f=result[1]
f_test= objective_function_simple(N, l, gbis, best_selected_cities,None)
print(f_test)
f_test,_,_,_= objective_function_(N, l, gbis,None, best_selected_cities,None,None)
print(f_test)
plotResult(gbis,0,best_selected_cities,None,np.zeros(10) +brute_force_f,None,None)


# best_selected_cities_N=np.zeros(N)
# for k in range (N):
#     if best_selected_cities[clx[k]]==1:
#         best_selected_cities_N[k]=1;
# print (best_selected_cities_N)
best_selected_cities_N=np.array([best_selected_cities[clx[k]] for k in range (N)])
# print (best_selected_cities_N)

f_test, max_dist, max_idx, convex_hull = objective_function_(N, l, g, None, best_selected_cities_N, None, None)
print(f_test)
brute_force_f=f_test
plotResult(g,0,best_selected_cities_N,None,np.zeros(10) +f_test,None,None)

np.save('sol1', best_selected_cities_N)



In [None]:
# best_sol, best_f = opt_solution_circle(N, l, g, verbose=True)
# print(f"Best solution: {best_f}")    
# plotResult(g,0, best_sol, None, np.zeros(n_iter) + best_f, None, None)  

In [None]:
initial_selection_probability=0.5

In [None]:
n_iter = 10000
## betas=[5,10,20,50,100]
betas=[40]
np.random.seed()
# selected_cities=(np.random.rand(N) <= initial_selection_probability).astype(np.int32)
selected_cities=np.zeros(N)
#selected_cities=best_selected_cities_N
# print(selected_cities)

f_st=np.zeros((5,n_iter))
selected_st=np.zeros((5,N))
fig=plt.plot
for i in range(4):
    np.random.seed()
    selected_cities_n, selected_cities_n_convex, loss_values,loss_value_convex = optimize_with_initialize_betas(g, l, selected_cities, betas=betas, 
                                              n_iter=n_iter,mutation_strategy=i, initial_selection_probability=initial_selection_probability,
                                              precompute_pairwise_dist=False, verbose=False)
    
    f = objective_function_simple(N, l, g, selected_cities_n_convex,
                          pairwise_distances=None)
    loss_value_convex = f
    if type(selected_cities_n) == list:
        selected_cities_n = selected_cities_n[-1]
    loss_values[-1]=(loss_value_convex) 
#     print(loss_value_convex)
    f_st[i]=loss_values
    plt.plot(f_st[i],label = "mutation_strategy="+str(i))
    selected_st[i]=selected_cities_n_convex
    
#combining 3 and 2    
selected_cities_n, selected_cities_n_convex, loss_values1,loss_value_convex = optimize_with_initialize_betas(g, l,selected_cities, betas=betas,  n_iter=n_iter,mutation_strategy=3, precompute_pairwise_dist=False, verbose=False)
loss_values1[-1]=(loss_value_convex)  
if type(selected_cities_n) == list:
    selected_cities_n = selected_cities_n[-1]
selected_cities_n, selected_cities_n_convex, loss_values,loss_value_convex = optimize_with_initialize_betas(g, l, selected_cities_n_convex, betas=[betas[-1]],n_iter=n_iter,mutation_strategy=2,precompute_pairwise_dist=False, verbose=False)
if type(selected_cities_n) == list:
    selected_cities_n = selected_cities_n[-1]
loss_values[-1]=(loss_value_convex) 
total_loss=np.concatenate((loss_values1,loss_values))
total_loss=total_loss[::2]
f_st[4]=total_loss
selected_st[4]=selected_cities_n_convex
plt.plot(f_st[4],label = "Combining")

# plt.hlines(best_f, 0, n_iter, linestyles="-.", label="optimal solution")
plt.hlines(f_test, 0, n_iter, linestyles="--", label="brut-force solution")

plt.legend()
print('Min of all the methods:')
print(np.amin(f_st,1))
#print(np.amin(f_st))
best_min=np.amin(f_st)
best=np.argmin(np.amin(f_st,1))
print("The best cost is=%f, found for strategy %i"%(best_min,best))
# print(f'Approximated Bruteforce best solution {best_f}')
print(f'Real Bruteforce best solution {brute_force_f}')

plotResult(g,0,selected_st[best],None,f_st[best],None,None,verbose=False)

if f_st[best][-1]==brute_force_f:
    print("Best solution found!")
    
np.save('solbest', selected_st[best])



In [None]:
# print(selected_st[best])
# f, max_dist, max_idx, convex_hull = objective_function_(N, l, g, None, selected_st[best], None, None)
# print(f)
# f, max_dist, max_idx, convex_hull = objective_function_(N, l, g, None, best_selected_cities, None, None)
# print(f)


Test algorithm

In [None]:
n_iter = 5000
# parameters
l = 0.9 # lambda in [0,1], or in [0,2] for G2
# initialization
N = 1000
n_selected = 0
g = G2(N) # g = G2(N)

In [None]:
#set number of steps
step_cluster=np.int(np.floor(np.log10(N)))
print(step_cluster)
mutation_strategy=1
betas=[20]
totalLoss=[]

#first try
selected_cities=np.zeros(N)
selected_cities_n, selected_cities_n_convex, loss_values,loss_value_convex = optimize_with_initialize_betas(g, l, selected_cities, betas=betas, 
                                              n_iter=n_iter,mutation_strategy=mutation_strategy, initial_selection_probability=initial_selection_probability,
                                              precompute_pairwise_dist=False, verbose=False)
if type(selected_cities_n) == list:
    selected_cities_n = selected_cities_n[-1]
# print(selected_cities_n)
plotResult(g,0,selected_cities_n,selected_cities_n_convex,loss_values,loss_value_convex,None)

# pairwise_distances = (scipy.spatial.distance.pdist(g.x, 'sqeuclidean'))
# pairwise_distances = scipy.spatial.distance.squareform(pairwise_distances)
# selected_cities_n, selected_cities_n_convex, loss_values,loss_value_convex = optimize_clustering(N, g, l, selected_cities, pairwise_distances, betas=betas, 
#                                               n_iter=n_iter,mutation_strategy=mutation_strategy, initial_selection_probability=initial_selection_probability, verbose=False)

# if type(selected_cities_n) == list:
#     selected_cities_n = selected_cities_n[-1]
# print(selected_cities_n)
# plotResult(g,0,selected_cities_n,selected_cities_n_convex,loss_values,loss_value_convex,None)

for stepi in range (0,min(3,np.int(np.floor(np.log10(N)))-1)):
    betas=[betas[0]+10]
    nclusters=N//(10**(step_cluster-1-stepi))
    print('\n step = '+str(stepi)+', N clusters= '+str(nclusters))
    selected_cities=np.zeros(nclusters)
    is_in_selected_cluster=np.zeros(N)
    if stepi>0:
        is_in_selected_cluster=[selected_cities_n[clx[k]]==1 for k in range (N)]
        #print(is_in_selected_cluster)
        for k in range(N):
            c=clx[k]
            if selected_cities_n[c]==1:
                is_in_selected_cluster[k]=1
                
    data=g.x
    start_time=time.time()
    ind = np.argpartition(g.v, -nclusters)[-nclusters:]
    centroids=(data[ind])
    #centroids,_ = kmeans(data,nclusters)
    clx,_ = vq(data,centroids)
    centroids_V=[np.sum(g.v, where=(clx == k)) for k in range (centroids.shape[0])]
    centroids_V=np.divide(centroids_V,10**(step_cluster-1-stepi))
    d1=(time.time() - start_time)
    print('time clustering= '+str(d1))
    fig,axes=plt.subplots(1,2,figsize=(12,4))
    axes[0].scatter(data[:,0],data[:,1],c=clx ,label='cities')
    sequence = np.arange(centroids.shape[0])
    axes[1].scatter(centroids[:, 0], centroids[:, 1], c=sequence,label='centroids')
    g_clusters=G1(N)
    g_clusters.x=np.array(centroids)
    g_clusters.v=np.array(centroids_V)
    
    if stepi>0 :
        selected_cities=[np.sum((clx==k)*(is_in_selected_cluster))==(sum(clx==k)/2) for k in range (nclusters)]
        selected_cities=np.array(selected_cities)
        print('selected cities: ')
        for k in range (nclusters):
            in_cluster=clx==k
            is_selected_city=is_in_selected_cluster
            good_cities=in_cluster*is_in_selected_cluster
            if np.sum(good_cities)==(sum(in_cluster)):
                selected_cities[k]=1
#             if k==0:
#                 print(is_in_selected_cluster)
#                 print(in_cluster)
#                 print(is_selected_city)
#                 print(good_cities)
        #print(selected_cities)
    
    best_selected_cities_N=np.array([selected_cities[clx[k]] for k in range (N)])
    f = objective_function_simple(N, l, g, best_selected_cities_N,pairwise_distances=None)
    print('f of initializinf function = '+str(f))
    np.random.seed()
#     selected_cities_n, selected_cities_n_convex, loss_values,loss_value_convex = optimize_clustering(N, g_clusters, l, selected_cities, distances, betas=betas, 
#                                               n_iter=n_iter,mutation_strategy=mutation_strategy, initial_selection_probability=initial_selection_probability, verbose=False)

    print('begin algo')
    selected_cities_n, selected_cities_n_convex, loss_values,loss_value_convex = optimize_cluster(N,g_clusters, l, selected_cities, betas=betas, 
                                              n_iter=n_iter,mutation_strategy=mutation_strategy, initial_selection_probability=initial_selection_probability, verbose=False)  
    print('end algo')
    if type(selected_cities_n) == list:
        selected_cities_n = selected_cities_n[-1]
    else:
        print('problem!')
    #selected_cities_n_convex,loss_value_convex=adding_convex_points_end(N, l, g_clusters,selected_cities_n)
#     print(selected_cities_n_convex)
#     loss_values[-1]=(loss_value_convex) 
#     print(loss_value_convex)
#     f_st[i]=loss_values
#     plt.plot(f_st[i],label = "mutation_strategy="+str(i))
#     selected_st[i]=selected_cities_n_convex
    plotResult(g_clusters,0,selected_cities_n,selected_cities_n_convex,loss_values,loss_value_convex,None)
    totalLoss=np.concatenate((totalLoss,loss_values))
    print('end plot')
    
print('steps done')
is_in_selected_cluster=[selected_cities_n[clx[k]]==1 for k in range (N)]
selected_cities_n, selected_cities_n_convex, loss_values,loss_value_convex = optimize_with_initialize_betas(g, l, np.array(is_in_selected_cluster), betas=[50], 
                                              n_iter=n_iter,mutation_strategy=mutation_strategy, initial_selection_probability=initial_selection_probability,
                                              precompute_pairwise_dist=False, verbose=False)
if type(selected_cities_n) == list:
    selected_cities_n_result = np.array(selected_cities_n[-1])
plotResult(g,0,selected_cities_n_convex,selected_cities_n_convex,loss_values,loss_value_convex,None)

totalLoss=np.concatenate((totalLoss,loss_values))

plotResult(g,0,selected_cities_n_convex,selected_cities_n_convex,totalLoss,loss_value_convex,None)

    

In [None]:
# sol=np.load('sol.npy')
# f = objective_function_simple(N, l, g, sol,
#                           pairwise_distances=None)
# print(f)

plotResult(g,0,selected_cities_n_result,selected_cities_n_convex,loss_values,loss_value_convex,None)

In [None]:
result, result_convex,loss,convex_loss=do_optimization_cluster_simple(g, l, beta_init=20, n_iter=10000, mutation_strategy=3, initial_selection_probability=0.5, precompute_pairwise_dist=False, verbose=True, show=True)

In [None]:
result, result_convex,loss,convex_loss=do_optimization_cluster(g, l, betas_init=20, beta_last=50, n_iter=10000, mutation_strategy=3, initial_selection_probability=0.5, precompute_pairwise_dist=False, verbose=True, show=True)