# Task 1

In [0]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.optimizer import Optimizer
from torch.autograd import Variable
from torchsummary import summary

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np

In [0]:
class SimulatedAnnealing(Optimizer):
    def __init__(self, params, loss, model, features, labels, T_init, 
                 T_min, annealing_rate=0.6, period=10):
        defaults = dict(T=T_init, T_min=T_min, 
                        annealing_rate=annealing_rate, period=period, 
                        iteration=0)
        super(SimulatedAnnealing, self).__init__(params, defaults=defaults) 
        self.loss = loss
        self.model = model
        self.features = features
        self.labels = labels
        self.mu = 0
        self.sigma = 1
        self.cooled = False

    def step(self):
        loss_value = self.loss(self.model(self.features), self.labels)
        for group in self.param_groups:
            cloned_params = [p.clone() for p in group['params']]
            if group['iteration'] % group['period'] == 0:
                group['T'] *= group['annealing_rate']
            if group['T'] < group ['T_min']:
                self.cooled = True
            for p in group['params']:
                self.mu = p.data.mean().numpy()
                p.data = self.sample(p.data.shape)
            new_loss_value = self.loss(self.model(self.features), self.labels)
            if new_loss_value >= loss_value:
                alpha = np.exp(-(new_loss_value.detach().numpy() + 
                                 loss_value.detach().numpy()) / group['T'])
                if np.random.uniform(0, 1) > alpha:
                    for p, backup in zip(group['params'], cloned_params):
                        p.data = backup.data
            group['iteration'] += 1

    def sample(self, size):
        new_weights = np.random.normal(self.mu, self.sigma, size=size)
        return torch.Tensor(new_weights)

In [0]:
class FeedForwardNetwork(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(FeedForwardNetwork, self).__init__()
        self.linear1 = nn.Linear(input_dim, 32)
        self.linear2 = nn.Linear(32, 32)
        self.linear3 = nn.Linear(32, 3)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        out = F.relu(self.linear1(x))
        out = F.relu(self.linear2(out))
        out = self.linear3(out)
        out = self.softmax(out)
        return out

In [0]:
features, labels = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(features, labels, random_state=42, shuffle=True)
X_train = Variable(torch.Tensor(X_train).float())
X_test = Variable(torch.Tensor(X_test).float())
y_train = Variable(torch.Tensor(y_train).long())
y_test = Variable(torch.Tensor(y_test).long())

In [47]:
def train(epochs):
    model.train()
    for epoch in range(1, epochs + 1):
        optimizer.zero_grad()
        output = model(X_train)
        loss = criterion(output, y_train)
        optimizer.step()
        if optimizer.cooled:
            print('COOLED! epoch {}: loss {}'.format(epoch, loss.item()))
            break
        if epoch % 1000 == 0:
            print('epoch {}: loss {}'.format(epoch, loss.item()))

model = FeedForwardNetwork(input_dim=X_train.shape[1], output_dim=1)
criterion = nn.CrossEntropyLoss()
init_T = criterion(model(X_train), y_train).item()
optimizer = SimulatedAnnealing(model.parameters(), loss=criterion, model=model, features=X_train, labels=y_train, T_init=init_T, T_min=1e-9, annealing_rate=0.99)
train(20000)

model.eval()
out = model(X_train).detach().numpy()
predict = np.argmax(out, 1)
print('\nprediction accuracy on train set: {}'.format(accuracy_score(y_train.numpy(), predict)))

model.eval()
out = model(X_test).detach().numpy()
predict = np.argmax(out, 1)
print('prediction accuracy on test set: {}'.format(accuracy_score(y_test.numpy(), predict)))

epoch 1000: loss 0.9259991645812988
epoch 2000: loss 0.7417617440223694
epoch 3000: loss 0.7417617440223694
epoch 4000: loss 0.7417617440223694
epoch 5000: loss 0.7417617440223694
epoch 6000: loss 0.6776755452156067
epoch 7000: loss 0.6222280263900757
epoch 8000: loss 0.6222280263900757
epoch 9000: loss 0.6222280263900757
epoch 10000: loss 0.6222280263900757
epoch 11000: loss 0.6222280263900757
epoch 12000: loss 0.6222280263900757
epoch 13000: loss 0.6222280263900757
epoch 14000: loss 0.6222280263900757
epoch 15000: loss 0.6222280263900757
epoch 16000: loss 0.6222280263900757
epoch 17000: loss 0.6222280263900757
epoch 18000: loss 0.6222280263900757
epoch 19000: loss 0.6222280263900757
epoch 20000: loss 0.6222280263900757

prediction accuracy on train set: 0.9375
prediction accuracy on test set: 0.9473684210526315


In [46]:
def train(epochs):
    model.train()
    for epoch in range(1, epochs + 1):
        optimizer.zero_grad()
        output = model(X_train)
        loss = criterion(output, y_train)
        loss.backward()
        previous = loss.item()
        optimizer.step()
        if epoch % 1000 == 0:
            print('epoch {}: loss {}'.format(epoch, loss.item()))


model = FeedForwardNetwork(input_dim=X_train.shape[1], output_dim=1)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
train(10000)

model.eval()
out = model(X_train).detach().numpy()
predict = np.argmax(out, 1)
print('\nprediction accuracy on train set: {}'.format(accuracy_score(y_train.numpy(), predict)))

model.eval()
out = model(X_test).detach().numpy()
predict = np.argmax(out, 1)
print('prediction accuracy on test set: {}'.format(accuracy_score(y_test.numpy(), predict)))

epoch 1000: loss 0.569393515586853
epoch 2000: loss 0.5693193078041077
epoch 3000: loss 0.5693081617355347
epoch 4000: loss 0.5693047642707825
epoch 5000: loss 0.5693033933639526
epoch 6000: loss 0.5693027377128601
epoch 7000: loss 0.5693024396896362
epoch 8000: loss 0.5693022608757019
epoch 9000: loss 0.5693022012710571
epoch 10000: loss 0.5693021416664124

prediction accuracy on train set: 0.9821428571428571
prediction accuracy on test set: 0.9736842105263158
