In [92]:
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn
from tqdm import tqdm

In [140]:
class SpecificSoftmaxMSE():

    def __init__(self, neuron, y_true=0, dim=1):
        """
        Args:
        Name     Type    Desc
        neuron:  int     The output neuron to minimize
        y_true   float   The desired activation
        dim      int     The softmax axis. Default is one for tensor with shape (n_batches, n_classes)
        """
        super().__init__()
        self.neuron = neuron
        self.y_true = y_true
        self.logits = nn.Softmax(dim=dim)

    """
    Compute the MSE after computing the softmax of input.
    Forward is implemented in the __call__ method of super
    """
    def forward(self, y_pred):
        """
        Args
        y_pred  torch.tensor The output of the networ. Preferable shape (n_batch, n_classes)
        """
        if len(y_pred.shape) == 1:
            y_pred = y_pred.reshape(1, -1)
        return 0.5*(self.y_true - self.logits(y_pred)[:, self.neuron])**2
    
    def __call__(self, x):
        return self.forward(x)

In [132]:
class InexactZSGM(object):
    """
    Args:
    Name            Type                Description
    model:          (nn.Module)         The model to use to get the output
    loss:           (nn.Module)         The loss to minimize
    device:
    """
    def __init__(self, model, loss, device=torch.device('cuda')):
        self.device = device
        self.loss = loss
        self.model = model.to(self.device)
        self.model.eval()

        
    def depth_run(self, x, v, mk, gamma_k, mu_k, epsilon, 
                  max_steps=100, stop_criterion=1e-3, 
                  verbose=0, additional_out=False, tqdm_disabled=False):
        """
        Args:
        Name            Type                Description
        x:              (torch.tensor)      The variable of our optimization problem. Should be a 3D tensor (img)
        v:              (float)             The gaussian smoothing
        mk:             (list)              Number of normal vector to generate at every step
        gamma_k         (list)              Pseudo learning rate inside ICG at every step
        mu_k            (list)              Stopping criterion inside ICG at every step
        epsilon:        (float)             The upper bound of the infinity norm
        max_steps:      (int)               The maximum number of steps. Default is 100
        stop_criterion  (float)             The minimum loss function. Default is 1e-3
        verbose:        (int)               Display information or not. Default is 0
        additional_out  (bool)              Return also all the x. Default is False
        tqdm_disable    (bool)              Disable the tqdm bar. Default is False
        """
        x = x.to(self.device)
        
        # 1. Init class attributes
        self.create_boundaries(x, epsilon) # Set x_original min and max
        self.dim = x.shape
        self.total_dim = torch.prod(torch.tensor(x.shape))
        self.epsilon = epsilon
        
        # 2. Init list of results
        losses, outs = [], [] # List of losses and outputs
        
        # 3. Main optimization cycle
        for ep in tqdm(range(max_steps), disable=tqdm_disabled):
            if verbose:
                print("---------------")
                print("Step number: {}".format(ep))
            # 3.1 Call the step
            x, gk = self.step(x, v, gamma_k[ep], mu_k[ep], mk[ep], verbose)
            x = x.reshape(self.dim[0], self.dim[1], self.dim[2])
            # 3.2 Compute loss
            out = self.model(x.view(1, self.dim[0], self.dim[1], self.dim[2]))
            loss = self.loss(out)
            # 3.3 Save results
            losses.append(loss.detach().cpu().item())
            outs.append(out.detach().cpu()[0, self.loss.neuron].item())
            # 3.4 Display current info
            if verbose:
                print("Loss:        {}".format(losses[-1]))
                print("Output:      {}".format(outs[-1]))
                
        return losses, outs

    
    def step(self, x, v, gamma, mu, mk, verbose=0):
        # Compute the approximated gradient
        g = self.compute_Gk(x, v, mk, verbose)
        # Call the inexact conditional gradient
        x_new = self.compute_ICG(x, g, gamma, mu).reshape(x.shape[0], x.shape[1], x.shape[2])
        
        if verbose > 1:
            print("INSIDE STEP")
            print("Gradient has shape: {}".format(g.shape))
            print("Gradient is:\n{}".format(g))
            print("x_new has shape: {}".format(x_new.shape))
            print("x_new is:\n{}".format(x_new))

        return x_new.detach(), g.detach()
    
    """
    Compute the Gv(x(k-1), chi(k-1), u(k)) in order to compute an approximation of the gradient of f(x(k-1), chi(k-1))
    """
    def compute_Gk(self, x, v, mk, verbose=0):
        """
        Args:
        Name            Type                Description
        x:              (torch.tensor)      The variable of our optimization problem. Should be a 3D tensor (img)  
        v:              (float)             The gaussian smoothing
        verbose:        (bool)              Display information or not. Default is 0
        """
        # 1. Create x(k-1) + v*u(k-1)
        uk     = torch.empty(mk, self.total_dim).normal_(mean=0, std=1).to(self.device) # Dim (mk, channel*width*height)
        img_u  = uk.reshape(mk, self.dim[0], self.dim[1], self.dim[2])                  # Dim (mk, channel, width, height)
        img_x  = x.expand(mk, self.dim[0], self.dim[1], self.dim[2])                    # Dim (mk, channel, width, height)
        m_x    = (img_x + v*img_u)                                                      # Dim (mk, channel, width, height)

        if verbose > 1:
            print('INPUT')
            print('The Gaussian vector uk has shape:{}'.format(uk.shape))
            print('The input x has shape:\t\t{}'.format(x.shape))
            print('The input x + vu has shape:\t{}'.format(m_x.shape))

        # 2. Get objective functions
        standard_loss = self.loss(self.model(x.view(1, x.shape[0], x.shape[1], x.shape[2])))                                        # Dim (1)
        gaussian_loss = self.loss(self.model(m_x))                                      # Dim (mk)

        # 3. Compute Gv(x(k-1), chi(k-1), u(k))
        fv = ((gaussian_loss - standard_loss.expand(uk.shape[0]))/v).view(-1, 1)        # Dim (mk, 1)
        G = fv * uk                                                                     # Dim (mk, channel*width*height)

        return torch.mean(G, axis=0).detach()


    def compute_ICG(self, x, g, gamma, mu):
        """
        Args:
        Name            Type                Description
        x:              (torch.tensor)      The variable of our optimization problem. Should be a 3D tensor (img)
        g:              (torch.tensor)      The approximated gradient. Should be a 1D tensor
        gamma:          (float)             A list of the the number of normal vector to generate at every step
        mu:             (float)             The momentum to use at every step
        """
        # 1. Init variables
        y_old = x.view(-1) # dim = (n_channel * width * height)
        u = torch.rand(self.total_dim).to(self.device)*(self.max.view(-1) - self.min.view(-1)) + self.min.view(-1)
        t = 1
        k = 0
                
        # 2. Main cycle
        while(k==0):
            # 2.1 Compute gradient
            grad = g + gamma*(y_old - x.view(-1))
            # 2.2 Move to the boundaries in one shot
            y_new = self.x_original.view(-1) - self.epsilon*torch.sign(grad)
            # 2.3 Compute new function value
            h = torch.dot(grad, y_new - y_old)
            # 2.4 Check conditions
            if h >= -mu:
                k = 1
            else:
                y_old = (t-1)/(t+1)*y_old + 2/(t+1)*y_new
                t += 1
        
        return y_old.detach()
    
    
    def create_boundaries(self, x, epsilon):
        """
        Args:
        Name            Type                Description
        x:              (torch.tensor)      The original image. Should be a 3D tensor (img)
        epsilon:        (float)             The maximum value of ininity norm.
        """
        self.x_original = x.clone().to(self.device)           # dim = (n_channel, width, height)
        self.max = (self.x_original+epsilon).to(self.device)  # dim = (n_channel, width, height)
        self.min = (self.x_original-epsilon).to(self.device)  # dim = (n_channel, width, height)
        self.max[self.max > 1] = 1
        self.min[self.min < 0] = 0


In [127]:
class Net(nn.Module):

    def __init__(self):
        super().__init__()
        self.conv = nn.Conv2d(1, 3, (2, 2), stride=2, padding=1)
        self.linear = nn.Linear(3, 3)


    def forward(self, x):
        x = nn.ReLU()(self.conv(x))
        return nn.Sigmoid()(self.linear(x.view(x.shape[0], -1)))

In [145]:
net = Net()
original_x = torch.rand(1, 1, 1).float()
print('X is {:.5f}'.format(float(original_x)))
print('Out is {}'.format(net(original_x.view(1, 1, 1, 1))[0].data))

X is 0.22989
Out is tensor([0.5542, 0.4462, 0.3935])


In [146]:
loss_fn = SpecificSoftmaxMSE(neuron=0, y_true=0)
optim = InexactZSGM(model=net, loss=loss_fn)

In [139]:
6*(d+5)*N

tensor(3600)

In [147]:
N = 100
d = torch.prod(torch.tensor(original_x.shape))
print()

params = {'x':              original_x,
          'v':              (1/(2*N*(d+3)**3))**0.5,
          'mk':             [6*(d+5)*N]*100,
          'gamma_k':        [0.3]*100,
          'mu_k':           [1/(4*N)]*100,
          'epsilon':        0.2,
          'max_steps':      N,
          'stop_criterion': 1e-1,
          'verbose':        2
         }

losses, outs = optim.depth_run(**params)

  3%|██▍                                                                               | 3/100 [00:00<00:03, 28.86it/s]


INPUT
The Gaussian vector uk has shape:torch.Size([3600, 1])
The input x has shape:		torch.Size([1, 1, 1])
The input x + vu has shape:	torch.Size([3600, 1, 1, 1])
INSIDE STEP
Gradient has shape: torch.Size([1])
Gradient is:
tensor([-0.0014], device='cuda:0')
x_new has shape: torch.Size([1, 1, 1])
x_new is:
tensor([[[0.2299]]], device='cuda:0')
---------------
Step number: 0
Loss:        0.06615575402975082
Output:      0.5541889071464539
INPUT
The Gaussian vector uk has shape:torch.Size([3600, 1])
The input x has shape:		torch.Size([1, 1, 1])
The input x + vu has shape:	torch.Size([3600, 1, 1, 1])
INSIDE STEP
Gradient has shape: torch.Size([1])
Gradient is:
tensor([-0.0014], device='cuda:0')
x_new has shape: torch.Size([1, 1, 1])
x_new is:
tensor([[[0.2299]]], device='cuda:0')
---------------
Step number: 1
Loss:        0.06615575402975082
Output:      0.5541889071464539
INPUT
The Gaussian vector uk has shape:torch.Size([3600, 1])
The input x has shape:		torch.Size([1, 1, 1])
The inpu

 11%|████████▉                                                                        | 11/100 [00:00<00:02, 32.53it/s]


Step number: 5
Loss:        0.06615575402975082
Output:      0.5541889071464539
INPUT
The Gaussian vector uk has shape:torch.Size([3600, 1])
The input x has shape:		torch.Size([1, 1, 1])
The input x + vu has shape:	torch.Size([3600, 1, 1, 1])
INSIDE STEP
Gradient has shape: torch.Size([1])
Gradient is:
tensor([-0.0015], device='cuda:0')
x_new has shape: torch.Size([1, 1, 1])
x_new is:
tensor([[[0.2299]]], device='cuda:0')
---------------
Step number: 6
Loss:        0.06615575402975082
Output:      0.5541889071464539
INPUT
The Gaussian vector uk has shape:torch.Size([3600, 1])
The input x has shape:		torch.Size([1, 1, 1])
The input x + vu has shape:	torch.Size([3600, 1, 1, 1])
INSIDE STEP
Gradient has shape: torch.Size([1])
Gradient is:
tensor([-0.0015], device='cuda:0')
x_new has shape: torch.Size([1, 1, 1])
x_new is:
tensor([[[0.2299]]], device='cuda:0')
---------------
Step number: 7
Loss:        0.06615575402975082
Output:      0.5541889071464539
INPUT
The Gaussian vector uk has sh

 19%|███████████████▍                                                                 | 19/100 [00:00<00:02, 34.12it/s]


---------------
Step number: 11
Loss:        0.06615575402975082
Output:      0.5541889071464539
INPUT
The Gaussian vector uk has shape:torch.Size([3600, 1])
The input x has shape:		torch.Size([1, 1, 1])
The input x + vu has shape:	torch.Size([3600, 1, 1, 1])
INSIDE STEP
Gradient has shape: torch.Size([1])
Gradient is:
tensor([-0.0016], device='cuda:0')
x_new has shape: torch.Size([1, 1, 1])
x_new is:
tensor([[[0.2299]]], device='cuda:0')
---------------
Step number: 12
Loss:        0.06615575402975082
Output:      0.5541889071464539
INPUT
The Gaussian vector uk has shape:torch.Size([3600, 1])
The input x has shape:		torch.Size([1, 1, 1])
The input x + vu has shape:	torch.Size([3600, 1, 1, 1])
INSIDE STEP
Gradient has shape: torch.Size([1])
Gradient is:
tensor([-0.0015], device='cuda:0')
x_new has shape: torch.Size([1, 1, 1])
x_new is:
tensor([[[0.2299]]], device='cuda:0')
---------------
Step number: 13
Loss:        0.06615575402975082
Output:      0.5541889071464539
INPUT
The Gaussi

 23%|██████████████████▋                                                              | 23/100 [00:00<00:02, 34.06it/s]

INSIDE STEP
Gradient has shape: torch.Size([1])
Gradient is:
tensor([-0.0015], device='cuda:0')
x_new has shape: torch.Size([1, 1, 1])
x_new is:
tensor([[[0.2299]]], device='cuda:0')
---------------
Step number: 19
Loss:        0.06615575402975082
Output:      0.5541889071464539
INPUT
The Gaussian vector uk has shape:torch.Size([3600, 1])
The input x has shape:		torch.Size([1, 1, 1])
The input x + vu has shape:	torch.Size([3600, 1, 1, 1])
INSIDE STEP
Gradient has shape: torch.Size([1])
Gradient is:
tensor([-0.0015], device='cuda:0')
x_new has shape: torch.Size([1, 1, 1])
x_new is:
tensor([[[0.2299]]], device='cuda:0')
---------------
Step number: 20
Loss:        0.06615575402975082
Output:      0.5541889071464539
INPUT
The Gaussian vector uk has shape:torch.Size([3600, 1])
The input x has shape:		torch.Size([1, 1, 1])
The input x + vu has shape:	torch.Size([3600, 1, 1, 1])
INSIDE STEP
Gradient has shape: torch.Size([1])
Gradient is:
tensor([-0.0014], device='cuda:0')
x_new has shape: t

 31%|█████████████████████████                                                        | 31/100 [00:00<00:01, 35.82it/s]

Gradient is:
tensor([-0.0015], device='cuda:0')
x_new has shape: torch.Size([1, 1, 1])
x_new is:
tensor([[[0.2299]]], device='cuda:0')
---------------
Step number: 26
Loss:        0.06615575402975082
Output:      0.5541889071464539
INPUT
The Gaussian vector uk has shape:torch.Size([3600, 1])
The input x has shape:		torch.Size([1, 1, 1])
The input x + vu has shape:	torch.Size([3600, 1, 1, 1])
INSIDE STEP
Gradient has shape: torch.Size([1])
Gradient is:
tensor([-0.0015], device='cuda:0')
x_new has shape: torch.Size([1, 1, 1])
x_new is:
tensor([[[0.2299]]], device='cuda:0')
---------------
Step number: 27
Loss:        0.06615575402975082
Output:      0.5541889071464539
INPUT
The Gaussian vector uk has shape:torch.Size([3600, 1])
The input x has shape:		torch.Size([1, 1, 1])
The input x + vu has shape:	torch.Size([3600, 1, 1, 1])
INSIDE STEP
Gradient has shape: torch.Size([1])
Gradient is:
tensor([-0.0015], device='cuda:0')
x_new has shape: torch.Size([1, 1, 1])
x_new is:
tensor([[[0.2299]

 40%|████████████████████████████████▍                                                | 40/100 [00:01<00:01, 38.25it/s]


Gradient has shape: torch.Size([1])
Gradient is:
tensor([-0.0015], device='cuda:0')
x_new has shape: torch.Size([1, 1, 1])
x_new is:
tensor([[[0.2299]]], device='cuda:0')
---------------
Step number: 33
Loss:        0.06615575402975082
Output:      0.5541889071464539
INPUT
The Gaussian vector uk has shape:torch.Size([3600, 1])
The input x has shape:		torch.Size([1, 1, 1])
The input x + vu has shape:	torch.Size([3600, 1, 1, 1])
INSIDE STEP
Gradient has shape: torch.Size([1])
Gradient is:
tensor([-0.0016], device='cuda:0')
x_new has shape: torch.Size([1, 1, 1])
x_new is:
tensor([[[0.2299]]], device='cuda:0')
---------------
Step number: 34
Loss:        0.06615575402975082
Output:      0.5541889071464539
INPUT
The Gaussian vector uk has shape:torch.Size([3600, 1])
The input x has shape:		torch.Size([1, 1, 1])
The input x + vu has shape:	torch.Size([3600, 1, 1, 1])
INSIDE STEP
Gradient has shape: torch.Size([1])
Gradient is:
tensor([-0.0015], device='cuda:0')
x_new has shape: torch.Size([

 44%|███████████████████████████████████▋                                             | 44/100 [00:01<00:01, 37.41it/s]


x_new has shape: torch.Size([1, 1, 1])
x_new is:
tensor([[[0.2299]]], device='cuda:0')
---------------
Step number: 40
Loss:        0.06615575402975082
Output:      0.5541889071464539
INPUT
The Gaussian vector uk has shape:torch.Size([3600, 1])
The input x has shape:		torch.Size([1, 1, 1])
The input x + vu has shape:	torch.Size([3600, 1, 1, 1])
INSIDE STEP
Gradient has shape: torch.Size([1])
Gradient is:
tensor([-0.0016], device='cuda:0')
x_new has shape: torch.Size([1, 1, 1])
x_new is:
tensor([[[0.2299]]], device='cuda:0')
---------------
Step number: 41
Loss:        0.06615575402975082
Output:      0.5541889071464539
INPUT
The Gaussian vector uk has shape:torch.Size([3600, 1])
The input x has shape:		torch.Size([1, 1, 1])
The input x + vu has shape:	torch.Size([3600, 1, 1, 1])
INSIDE STEP
Gradient has shape: torch.Size([1])
Gradient is:
tensor([-0.0015], device='cuda:0')
x_new has shape: torch.Size([1, 1, 1])
x_new is:
tensor([[[0.2299]]], device='cuda:0')
---------------
Step numbe

 53%|██████████████████████████████████████████▉                                      | 53/100 [00:01<00:01, 37.58it/s]


Gradient has shape: torch.Size([1])
Gradient is:
tensor([-0.0015], device='cuda:0')
x_new has shape: torch.Size([1, 1, 1])
x_new is:
tensor([[[0.2299]]], device='cuda:0')
---------------
Step number: 48
Loss:        0.06615575402975082
Output:      0.5541889071464539
INPUT
The Gaussian vector uk has shape:torch.Size([3600, 1])
The input x has shape:		torch.Size([1, 1, 1])
The input x + vu has shape:	torch.Size([3600, 1, 1, 1])
INSIDE STEP
Gradient has shape: torch.Size([1])
Gradient is:
tensor([-0.0015], device='cuda:0')
x_new has shape: torch.Size([1, 1, 1])
x_new is:
tensor([[[0.2299]]], device='cuda:0')
---------------
Step number: 49
Loss:        0.06615575402975082
Output:      0.5541889071464539
INPUT
The Gaussian vector uk has shape:torch.Size([3600, 1])
The input x has shape:		torch.Size([1, 1, 1])
The input x + vu has shape:	torch.Size([3600, 1, 1, 1])
INSIDE STEP
Gradient has shape: torch.Size([1])
Gradient is:
tensor([-0.0015], device='cuda:0')
x_new has shape: torch.Size([

 61%|█████████████████████████████████████████████████▍                               | 61/100 [00:01<00:01, 37.17it/s]


Gradient has shape: torch.Size([1])
Gradient is:
tensor([-0.0015], device='cuda:0')
x_new has shape: torch.Size([1, 1, 1])
x_new is:
tensor([[[0.2299]]], device='cuda:0')
---------------
Step number: 54
Loss:        0.06615575402975082
Output:      0.5541889071464539
INPUT
The Gaussian vector uk has shape:torch.Size([3600, 1])
The input x has shape:		torch.Size([1, 1, 1])
The input x + vu has shape:	torch.Size([3600, 1, 1, 1])
INSIDE STEP
Gradient has shape: torch.Size([1])
Gradient is:
tensor([-0.0015], device='cuda:0')
x_new has shape: torch.Size([1, 1, 1])
x_new is:
tensor([[[0.2299]]], device='cuda:0')
---------------
Step number: 55
Loss:        0.06615575402975082
Output:      0.5541889071464539
INPUT
The Gaussian vector uk has shape:torch.Size([3600, 1])
The input x has shape:		torch.Size([1, 1, 1])
The input x + vu has shape:	torch.Size([3600, 1, 1, 1])
INSIDE STEP
Gradient has shape: torch.Size([1])
Gradient is:
tensor([-0.0016], device='cuda:0')
x_new has shape: torch.Size([

 65%|████████████████████████████████████████████████████▋                            | 65/100 [00:01<00:00, 37.77it/s]


---------------
Step number: 61
Loss:        0.06615575402975082
Output:      0.5541889071464539
INPUT
The Gaussian vector uk has shape:torch.Size([3600, 1])
The input x has shape:		torch.Size([1, 1, 1])
The input x + vu has shape:	torch.Size([3600, 1, 1, 1])
INSIDE STEP
Gradient has shape: torch.Size([1])
Gradient is:
tensor([-0.0016], device='cuda:0')
x_new has shape: torch.Size([1, 1, 1])
x_new is:
tensor([[[0.2299]]], device='cuda:0')
---------------
Step number: 62
Loss:        0.06615575402975082
Output:      0.5541889071464539
INPUT
The Gaussian vector uk has shape:torch.Size([3600, 1])
The input x has shape:		torch.Size([1, 1, 1])
The input x + vu has shape:	torch.Size([3600, 1, 1, 1])
INSIDE STEP
Gradient has shape: torch.Size([1])
Gradient is:
tensor([-0.0014], device='cuda:0')
x_new has shape: torch.Size([1, 1, 1])
x_new is:
tensor([[[0.2299]]], device='cuda:0')
---------------
Step number: 63
Loss:        0.06615575402975082
Output:      0.5541889071464539
INPUT
The Gaussi

 73%|███████████████████████████████████████████████████████████▏                     | 73/100 [00:02<00:01, 26.56it/s]

INPUT
The Gaussian vector uk has shape:torch.Size([3600, 1])
The input x has shape:		torch.Size([1, 1, 1])
The input x + vu has shape:	torch.Size([3600, 1, 1, 1])
INSIDE STEP
Gradient has shape: torch.Size([1])
Gradient is:
tensor([-0.0014], device='cuda:0')
x_new has shape: torch.Size([1, 1, 1])
x_new is:
tensor([[[0.2299]]], device='cuda:0')
---------------
Step number: 69
Loss:        0.06615575402975082
Output:      0.5541889071464539
INPUT
The Gaussian vector uk has shape:torch.Size([3600, 1])
The input x has shape:		torch.Size([1, 1, 1])
The input x + vu has shape:	torch.Size([3600, 1, 1, 1])
INSIDE STEP
Gradient has shape: torch.Size([1])
Gradient is:
tensor([-0.0016], device='cuda:0')
x_new has shape: torch.Size([1, 1, 1])
x_new is:
tensor([[[0.2299]]], device='cuda:0')
---------------
Step number: 70
Loss:        0.06615575402975082
Output:      0.5541889071464539
INPUT
The Gaussian vector uk has shape:torch.Size([3600, 1])
The input x has shape:		torch.Size([1, 1, 1])
The inp

 77%|██████████████████████████████████████████████████████████████▎                  | 77/100 [00:02<00:00, 28.23it/s]


x_new has shape: torch.Size([1, 1, 1])
x_new is:
tensor([[[0.2299]]], device='cuda:0')
---------------
Step number: 75
Loss:        0.06615575402975082
Output:      0.5541889071464539
INPUT
The Gaussian vector uk has shape:torch.Size([3600, 1])
The input x has shape:		torch.Size([1, 1, 1])
The input x + vu has shape:	torch.Size([3600, 1, 1, 1])
INSIDE STEP
Gradient has shape: torch.Size([1])
Gradient is:
tensor([-0.0015], device='cuda:0')
x_new has shape: torch.Size([1, 1, 1])
x_new is:
tensor([[[0.2299]]], device='cuda:0')
---------------
Step number: 76
Loss:        0.06615575402975082
Output:      0.5541889071464539
INPUT
The Gaussian vector uk has shape:torch.Size([3600, 1])
The input x has shape:		torch.Size([1, 1, 1])
The input x + vu has shape:	torch.Size([3600, 1, 1, 1])
INSIDE STEP
Gradient has shape: torch.Size([1])
Gradient is:
tensor([-0.0015], device='cuda:0')
x_new has shape: torch.Size([1, 1, 1])
x_new is:
tensor([[[0.2299]]], device='cuda:0')
---------------
Step numbe

 85%|████████████████████████████████████████████████████████████████████▊            | 85/100 [00:02<00:00, 30.41it/s]


---------------
Step number: 80
Loss:        0.06615575402975082
Output:      0.5541889071464539
INPUT
The Gaussian vector uk has shape:torch.Size([3600, 1])
The input x has shape:		torch.Size([1, 1, 1])
The input x + vu has shape:	torch.Size([3600, 1, 1, 1])
INSIDE STEP
Gradient has shape: torch.Size([1])
Gradient is:
tensor([-0.0016], device='cuda:0')
x_new has shape: torch.Size([1, 1, 1])
x_new is:
tensor([[[0.2299]]], device='cuda:0')
---------------
Step number: 81
Loss:        0.06615575402975082
Output:      0.5541889071464539
INPUT
The Gaussian vector uk has shape:torch.Size([3600, 1])
The input x has shape:		torch.Size([1, 1, 1])
The input x + vu has shape:	torch.Size([3600, 1, 1, 1])
INSIDE STEP
Gradient has shape: torch.Size([1])
Gradient is:
tensor([-0.0016], device='cuda:0')
x_new has shape: torch.Size([1, 1, 1])
x_new is:
tensor([[[0.2299]]], device='cuda:0')
---------------
Step number: 82
Loss:        0.06615575402975082
Output:      0.5541889071464539
INPUT
The Gaussi

 94%|████████████████████████████████████████████████████████████████████████████▏    | 94/100 [00:02<00:00, 34.29it/s]


x_new has shape: torch.Size([1, 1, 1])
x_new is:
tensor([[[0.2299]]], device='cuda:0')
---------------
Step number: 87
Loss:        0.06615575402975082
Output:      0.5541889071464539
INPUT
The Gaussian vector uk has shape:torch.Size([3600, 1])
The input x has shape:		torch.Size([1, 1, 1])
The input x + vu has shape:	torch.Size([3600, 1, 1, 1])
INSIDE STEP
Gradient has shape: torch.Size([1])
Gradient is:
tensor([-0.0016], device='cuda:0')
x_new has shape: torch.Size([1, 1, 1])
x_new is:
tensor([[[0.2299]]], device='cuda:0')
---------------
Step number: 88
Loss:        0.06615575402975082
Output:      0.5541889071464539
INPUT
The Gaussian vector uk has shape:torch.Size([3600, 1])
The input x has shape:		torch.Size([1, 1, 1])
The input x + vu has shape:	torch.Size([3600, 1, 1, 1])
INSIDE STEP
Gradient has shape: torch.Size([1])
Gradient is:
tensor([-0.0016], device='cuda:0')
x_new has shape: torch.Size([1, 1, 1])
x_new is:
tensor([[[0.2299]]], device='cuda:0')
---------------
Step numbe

100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:02<00:00, 34.18it/s]

---------------
Step number: 94
Loss:        0.06615575402975082
Output:      0.5541889071464539
INPUT
The Gaussian vector uk has shape:torch.Size([3600, 1])
The input x has shape:		torch.Size([1, 1, 1])
The input x + vu has shape:	torch.Size([3600, 1, 1, 1])
INSIDE STEP
Gradient has shape: torch.Size([1])
Gradient is:
tensor([-0.0016], device='cuda:0')
x_new has shape: torch.Size([1, 1, 1])
x_new is:
tensor([[[0.2299]]], device='cuda:0')
---------------
Step number: 95
Loss:        0.06615575402975082
Output:      0.5541889071464539
INPUT
The Gaussian vector uk has shape:torch.Size([3600, 1])
The input x has shape:		torch.Size([1, 1, 1])
The input x + vu has shape:	torch.Size([3600, 1, 1, 1])
INSIDE STEP
Gradient has shape: torch.Size([1])
Gradient is:
tensor([-0.0016], device='cuda:0')
x_new has shape: torch.Size([1, 1, 1])
x_new is:
tensor([[[0.2299]]], device='cuda:0')
---------------
Step number: 96
Loss:        0.06615575402975082
Output:      0.5541889071464539
INPUT
The Gaussia




In [None]:


epoch = 50
m = [50]*epoch
a = [0.9]*epoch
v = 1


net = Net()
loss = SpecificSoftmaxMSE(neuron=2, y_true=0, dim=1)
optim = ZeroSGD(model=net, loss=loss)

x = torch.tensor([1])
x, loss_curve, out, xs = optim.run(x.view(1, 1, 1), v, m, a, epsilon=0.5,
                            max_steps=epoch, stop_criterion = 0,
                            max_aux_step = 100, verbose=0, additional_out=True)

min_, max_ = min(xs), max(xs)
losses = []
for i in tqdm(range(int(min_-1)*10, int(max_+1)*10)):
    x = torch.tensor([i/10]).to(torch.device('cuda'))
    out = net(x.view(1, 1, 1, 1))
    losses.append(loss(out))

plt.plot([i/10 for i in range(int(min_-1)*10, int(max_+1)*10)], losses, label='Loss curve')
plt.scatter(xs, loss_curve, label='Parameters')
plt.legend()
plt.xlabel('Input')
plt.ylabel('Loss')
plt.title('Loss function')
plt.grid()
plt.show()
