In [0]:
import torch
from torch.optim import Optimizer
import torch.nn as nn
import numpy as np
import math
import pandas as pd
from sklearn.model_selection import train_test_split
import torch.nn.functional as F
from torch.autograd import Variable
from sklearn.metrics import accuracy_score, precision_score, recall_score

In [0]:
# load IRIS dataset
dataset = pd.read_csv('iris.csv')


# transform species to numerics
dataset.loc[dataset.species=='Iris-setosa', 'species'] = 0
dataset.loc[dataset.species=='Iris-versicolor', 'species'] = 1
dataset.loc[dataset.species=='Iris-virginica', 'species'] = 2

train_X, test_X, train_y, test_y = train_test_split(dataset[dataset.columns[0:4]].values,
                                                    dataset.species.values, test_size=0.8)

In [0]:
class Net(nn.Module):
    # define nn
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(4, 8)
        self.fc2 = nn.Linear(8, 8)
        self.fc3 = nn.Linear(8, 3)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, X):
        X = F.relu(self.fc1(X))
        X = self.fc2(X)
        X = self.fc3(X)
        X = self.softmax(X)

        return X
    

In [0]:
class SimulatedAnnealing(Optimizer):
    def __init__(self, params, startTemp = 1
                 , coolRate = 0.001, neighborhoodSize = 5
                 , loss = nn.CrossEntropyLoss()
                 , model = None
                 , features = None
                 , labels = None): #these represent default values, but can be overridden
        self.startTemp = startTemp
        self.coolRate = coolRate
        self.currTemp = startTemp
        self.loss = loss
        self.model = model
        self.features = features
        self.labels = labels
        self.neighborhoodSize = neighborhoodSize
        self.param_groups = []
        self.defaults = dict(startTemp=startTemp, coolRate=coolRate, currTemp=startTemp,
                        loss=loss,model=model, features = features, labels = labels,
                        neighborhoodSize = neighborhoodSize)

        param_groups = list(params)
        if len(param_groups) == 0:
            raise ValueError("optimizer got an empty parameter list")
        if not isinstance(param_groups[0], dict):
            param_groups = [{'params': param_groups}]

        for param_group in param_groups:
            self.add_param_group(param_group)

    def step(self):
        #need to first generate a random new point in the space
        oldOutputs = self.model(self.features)
        oldLoss = self.loss(oldOutputs, self.labels.type(torch.LongTensor))

        # Save init values
        old_state_dict = {}
        for key in self.model.state_dict():
            old_state_dict[key] = self.model.state_dict()[key].clone()

        for name, param in self.model.state_dict().items():
            # generate a matrix of random changes in each param element to be added to each param matrix
            # random = torch.Tensor(np.random.uniform(low = self.neighborhoodSize * -1, high = self.neighborhoodSize
                                                        # , size = param.shape))
            random = torch.Tensor(np.random.uniform(low = self.neighborhoodSize * -1, high = self.neighborhoodSize
                                                        , size = param.shape))
            
            #now add random to the params to transform them
            new_param = param + random
            self.model.state_dict()[name].copy_(new_param)

        
        newOutputs = self.model(self.features)
        newLoss = self.loss(newOutputs, self.labels.type(torch.LongTensor))

        if (newLoss > oldLoss):
            alpha = math.exp(-(newLoss - oldLoss) / self.currTemp)
            #print(newLoss.item(), oldLoss.item(), self.currTemp, alpha, -(newLoss - oldLoss) / self.currTemp)
            if (np.random.uniform(0, 1) < alpha): 
              self.model.load_state_dict(old_state_dict)
        
        self.currTemp -= self.coolRate


In [0]:
# wrap up with Variable in pytorch
train_X = Variable(torch.Tensor(train_X).float())
test_X = Variable(torch.Tensor(test_X).float())
train_y = Variable(torch.Tensor(train_y).long())
test_y = Variable(torch.Tensor(test_y).long())


net = Net()

criterion = nn.CrossEntropyLoss()# cross entropy loss


#optimizer = torch.optim.SGD(net.parameters(), lr=0.01)
optimizer = SimulatedAnnealing(net.parameters(), features=train_X, model=net, labels=train_y)

In [7]:
for epoch in range(10000):
    # optimizer.zero_grad()
    out = net(train_X)
    loss = criterion(out, train_y)
    # loss.backward()
    optimizer.step()

    if epoch % 100 == 0:
        print ('number of epoch', epoch, 'loss', loss.item())

predict_out = net(test_X)
_, predict_y = torch.max(predict_out, 1)

number of epoch 0 loss 1.0847781896591187
number of epoch 100 loss 1.0181115865707397
number of epoch 200 loss 1.0181115865707397
number of epoch 300 loss 1.0181115865707397
number of epoch 400 loss 1.0181115865707397
number of epoch 500 loss 1.0181115865707397
number of epoch 600 loss 1.0181115865707397
number of epoch 700 loss 1.0181115865707397
number of epoch 800 loss 1.0181115865707397
number of epoch 900 loss 1.0181115865707397
number of epoch 1000 loss 0.6847781538963318
number of epoch 1100 loss 0.6181114912033081
number of epoch 1200 loss 0.5514448285102844
number of epoch 1300 loss 0.5514448285102844
number of epoch 1400 loss 0.5514448285102844
number of epoch 1500 loss 0.5514448285102844
number of epoch 1600 loss 0.5514448285102844
number of epoch 1700 loss 0.5514448285102844
number of epoch 1800 loss 0.5514448285102844
number of epoch 1900 loss 0.5514448285102844
number of epoch 2000 loss 0.5514448285102844
number of epoch 2100 loss 0.5514448285102844
number of epoch 2200 l

In [8]:
print ('prediction accuracy', accuracy_score(test_y.data, predict_y.data))

print ('macro precision', precision_score(test_y.data, predict_y.data, average='macro'))
print ('micro precision', precision_score(test_y.data, predict_y.data, average='micro'))
print ('macro recall', recall_score(test_y.data, predict_y.data, average='macro'))
print ('micro recall', recall_score(test_y.data, predict_y.data, average='micro'))

prediction accuracy 0.8583333333333333
macro precision 0.873469387755102
micro precision 0.8583333333333333
macro recall 0.8681818181818182
micro recall 0.8583333333333333
