# Simulated annealing

Reference: https://github.com/DrompiX/sim_annealing/blob/master/iris_classification.ipynb

Run either with pytorch installed in local machine or in google colab for easier setup.

For pytorch, go to pytorch.org, build install script then install running the script. 

Note: pytorch is currenly supported in Python 3.9 or below. 

In [None]:
import numpy as np
from matplotlib import pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optimizers

from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

## Data preparation

In [None]:
data = load_iris()
features = data['data']
labels = data['target']
labels = labels.reshape(-1, 1)
features[0], labels[0]

(array([5.1, 3.5, 1.4, 0.2]), array([0]))

In [None]:
x_train, x_test, y_train, y_test = train_test_split(features, labels)

## Network definition

In [None]:

class IrisClassifier(nn.Module):
    def __init__(self, in_features=4, classes=3):
        super(IrisClassifier, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(in_features, 10),
            nn.ReLU(),
            nn.Linear(10, 10),
            nn.ReLU(),
            nn.Linear(10, classes)
        )
    
    def forward(self, x):
        return self.model(x)

## Optimizer implementation

In [None]:
class SimulatedAnnealing:
    def __init__(self, model, annealing_rate=0.999,  annealing_schedule=5):
        self.model = model
        self.annealing_rate = annealing_rate
        self.annealing_schedule = annealing_schedule

    def get_weights(self):
        """Get weights from the NN and concat them in single vector"""
        return torch.cat([ps.flatten() for ps in self.model.parameters()])

    def load_weights(self, new_params):
        """Load weights to the NN from single weights' vector"""
        j = 0
        for i, params in enumerate(self.model.parameters()):
            cur_len = np.prod(params.shape)
            params.data = new_params[j: cur_len + j].reshape(params.shape)
            j += cur_len

    def optimize(self, data, target, criterion, min_temp=1e-2):
        cur_params = self.get_weights()
        cur_loss = criterion(self.model(data), target)
        temp, t = cur_loss, 0

        while temp > min_temp:
            self.load_weights(cur_params)
            cur_loss = criterion(self.model(data), target)
            
            new_params = torch.distributions.Normal(cur_params, 0.2).sample()
            self.load_weights(new_params)
            new_loss = criterion(self.model(data), target)

            ap = self.acceptance_prob(cur_loss, new_loss, temp)
            if ap >= np.random.rand():
                cur_params = new_params
                cur_loss = new_loss

            if t > 0 and t % self.annealing_schedule == 0:
                print(f'Temperature = {temp} | Current loss = {cur_loss}')
                temp *= self.annealing_rate

            t += 1
                
        self.load_weights(cur_params)
    
    def acceptance_prob(self, cur_loss, new_loss, temp):
        if new_loss < cur_loss:
            return 1.0
        else:
            return torch.exp((cur_loss - new_loss) / temp)

## Network training and evaluation

In [None]:
device = torch.device('cpu')
iris_clf = IrisClassifier().double()
iris_clf.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = SimulatedAnnealing(iris_clf, 0.999, 5)

In [None]:
def train_classifier(model, x, y, criterion, optimizer):
    model.train()
    data = torch.from_numpy(x).to(device)
    target = torch.from_numpy(y).to(device).squeeze()
    optimizer.optimize(data, target, criterion)

def test_classifier(model, x, y):
    model.eval()
    data = torch.from_numpy(x).to(device)
    target = torch.from_numpy(y).to(device)
    
    correct = 0
    total = y.shape[0]

    for i, (feat, label) in enumerate(zip(data, target)):
        pred = model(feat).detach().cpu()
        if pred.argmax().item() == label.item():
            correct += 1
    
    print(f'Total test accuracy = {correct / total}')

In [None]:
%%time
train_classifier(iris_clf, x_train, y_train, criterion, optimizer)

Temperature = 1.21478589558292 | Current loss = 1.1399129763943496
Temperature = 1.2135711096873372 | Current loss = 2.0152123809076987
Temperature = 1.21235753857765 | Current loss = 1.3362418067748867
Temperature = 1.2111451810390723 | Current loss = 1.2170885271869774
Temperature = 1.2099340358580333 | Current loss = 1.130288127678533
Temperature = 1.2087241018221753 | Current loss = 1.0377896977384191
Temperature = 1.2075153777203531 | Current loss = 0.9175717647431384
Temperature = 1.2063078623426327 | Current loss = 0.9175717647431384
Temperature = 1.20510155448029 | Current loss = 1.800705186964105
Temperature = 1.2038964529258098 | Current loss = 1.800705186964105
Temperature = 1.202692556472884 | Current loss = 1.2574533824806393
Temperature = 1.201489863916411 | Current loss = 1.188004740763925
Temperature = 1.2002883740524948 | Current loss = 2.726446649375732
Temperature = 1.1990880856784423 | Current loss = 2.726446649375732
Temperature = 1.1978889975927638 | Current loss 

In [None]:
test_classifier(iris_clf, x_test, y_test)

Total test accuracy = 0.9210526315789473


## Different annealing rates

In [None]:
iris_clf_test = IrisClassifier().double()
iris_clf_test.to(device)
# annealing rate of 0.75
optimizer_test = SimulatedAnnealing(iris_clf_test, 0.75, 5)
train_classifier(iris_clf_test, x_train, y_train, criterion, optimizer_test)

Temperature = 1.1841346375766022 | Current loss = 3.5115799310258353
Temperature = 0.8881009781824516 | Current loss = 1.9131985204957171
Temperature = 0.6660757336368387 | Current loss = 1.4927684372995962
Temperature = 0.49955680022762905 | Current loss = 2.28813359974443
Temperature = 0.37466760017072176 | Current loss = 1.3462246993411369
Temperature = 0.28100070012804135 | Current loss = 1.249409872722055
Temperature = 0.210750525096031 | Current loss = 1.249409872722055
Temperature = 0.15806289382202326 | Current loss = 1.249409872722055
Temperature = 0.11854717036651745 | Current loss = 1.249409872722055
Temperature = 0.08891037777488808 | Current loss = 1.0423639910096771
Temperature = 0.06668278333116606 | Current loss = 1.0423639910096771
Temperature = 0.05001208749837455 | Current loss = 0.9880340640398355
Temperature = 0.03750906562378091 | Current loss = 0.9880340640398355
Temperature = 0.028131799217835683 | Current loss = 0.9880340640398355
Temperature = 0.02109884941337

In [None]:
test_classifier(iris_clf_test, x_test, y_test)

Total test accuracy = 0.2894736842105263


In [None]:
iris_clf_test = IrisClassifier().double()
iris_clf_test.to(device)
# annealing rate of 0.9
optimizer_test = SimulatedAnnealing(iris_clf_test, 0.9, 5)
train_classifier(iris_clf_test, x_train, y_train, criterion, optimizer_test)

Temperature = 1.1885894035768985 | Current loss = 4.839899504403847
Temperature = 1.0697304632192086 | Current loss = 6.263688347199436
Temperature = 0.9627574168972878 | Current loss = 1.8713465746918383
Temperature = 0.866481675207559 | Current loss = 1.616889299590193
Temperature = 0.7798335076868032 | Current loss = 3.316662471656493
Temperature = 0.7018501569181228 | Current loss = 1.8555447368360738
Temperature = 0.6316651412263106 | Current loss = 1.2724811397750138
Temperature = 0.5684986271036795 | Current loss = 0.7769164125243267
Temperature = 0.5116487643933115 | Current loss = 0.7769164125243267
Temperature = 0.4604838879539804 | Current loss = 1.3695560147455113
Temperature = 0.41443549915858235 | Current loss = 1.3695560147455113
Temperature = 0.3729919492427241 | Current loss = 0.5606307467491598
Temperature = 0.3356927543184517 | Current loss = 0.5606307467491598
Temperature = 0.3021234788866065 | Current loss = 0.5606307467491598
Temperature = 0.2719111309979459 | Cur

In [None]:
test_classifier(iris_clf_test, x_test, y_test)

Total test accuracy = 0.9736842105263158


## Gradient optimization testing

In [None]:
def train_classifier_grad(model, x, y, criterion, optimizer, epochs=1000):
    model.train()
    data = torch.from_numpy(x).to(device)
    target = torch.from_numpy(y).to(device)
    for epoch in range(epochs):
        total_loss = 0
        for i, (feat, label) in enumerate(zip(data, target)):

            pred = model(feat).unsqueeze(0)
            loss = criterion(pred, label)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        if epoch % 50 == 0 or epoch == epochs - 1:
            print(f'Epoch {epoch+1}/{epochs}, Loss = {total_loss}')

In [None]:
iris_clf2 = IrisClassifier().double()
iris_clf2.to(device)
optimizer2 = optimizers.Adam(iris_clf2.model.parameters(), lr=0.001)

In [None]:
%%time
train_classifier_grad(iris_clf2, x_train, y_train, criterion, optimizer2, epochs=125)

Epoch 1/125, Loss = 134.93507551816097
Epoch 51/125, Loss = 9.316814715360431
Epoch 101/125, Loss = 8.159235981004146
Epoch 125/125, Loss = 7.846305337223735
CPU times: user 12.2 s, sys: 40.5 ms, total: 12.2 s
Wall time: 12.4 s


In [None]:
test_classifier(iris_clf2, x_test, y_test)

Total test accuracy = 0.9473684210526315
