In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.init as torch_init
import torch.nn.functional as F

In [2]:
selected_delta_tensor = torch.load('files/selected_delta.pt', map_location=torch.device('cpu'))

In [3]:
for key, value in selected_delta_tensor.items():
    print(key, value.shape)

body.0.weight torch.Size([12, 10, 5, 5])
body.0.bias torch.Size([12])
body.2.weight torch.Size([12, 12, 5, 5])
body.2.bias torch.Size([12])
body.4.weight torch.Size([12, 12, 5, 5])
body.4.bias torch.Size([12])
body.6.weight torch.Size([12, 12, 5, 5])
body.6.bias torch.Size([12])
fc.0.weight torch.Size([1, 192])
fc.0.bias torch.Size([1])


In [4]:
ground_truth_data = torch.load('files/training_data1.pt', map_location=torch.device('cpu'))
ground_truth_label = torch.load('files/training_labels1.pt', map_location=torch.device('cpu'))
print(ground_truth_data.shape, ground_truth_label.shape)

torch.Size([1, 10, 64]) torch.Size([1])


In [5]:
def weight_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1 or classname.find('Linear') != -1:
        torch_init.xavier_uniform_(m.weight)
        if m.bias is not None:
            m.bias.data.fill_(0)

class lenet_used_to_generate_gradients_ucf(nn.Module):
    def __init__(self):
        super(lenet_used_to_generate_gradients_ucf, self).__init__()
        act = nn.Sigmoid 

        self.body = nn.Sequential(
            nn.Conv2d(10, 12, kernel_size=5, padding=5//2, stride=2),
            act(),
            nn.Conv2d(12, 12, kernel_size=5, padding=5//2, stride=2),
            act(),
            nn.Conv2d(12, 12, kernel_size=5, padding=5//2, stride=1),
            act(),
            nn.Conv2d(12, 12, kernel_size=5, padding=5//2, stride=1),
            act()
        )
        
        self.fc = nn.Sequential(
            nn.Linear(192, 1),
            nn.Sigmoid() 
        )
        
    def forward(self, inputs):
        current_tensor = inputs.unsqueeze(2)
        
        out = self.body(current_tensor)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        
        out = out.squeeze(-1) 
        
        return out
        
net = lenet_used_to_generate_gradients_ucf().to("cpu")

In [6]:
copy_delta = list(value.clone() for _, value in selected_delta_tensor.items())

In [7]:
dummy_matrix = torch.rand(ground_truth_data.size(), requires_grad=True).to("cpu")
dummy_label = torch.rand(ground_truth_label.size(), dtype=torch.float32, requires_grad=True).to("cpu") 

In [8]:
# optimizer = torch.optim.LBFGS([dummy_matrix, dummy_label])
lr = 0.01 # this value is used as the default one and not been changed. line 80
optimizer = torch.optim.Adam([dummy_matrix, dummy_label], lr=lr)

history = []
stuck = False
best_loss = float('inf') 

def add_randomness(tensor, noise_factor=0.001):
    noise = torch.randn_like(tensor) * noise_factor
    new_tensor = tensor + noise
    new_tensor = new_tensor.detach().clone() 
    new_tensor.requires_grad = True  
    return new_tensor

for iters in range(20000):
    def closure():
        optimizer.zero_grad()
        pred = net(dummy_matrix)
        # dummy_label.data = F.sigmoid(dummy_label.data)
        # print(pred, dummy_label)
        dummy_loss = torch.nn.BCEWithLogitsLoss()(pred, dummy_label)

        dummy_dy_dx = torch.autograd.grad(dummy_loss, net.parameters(), create_graph=True, allow_unused=True)
        weight_updates = [-lr * grad for grad in dummy_dy_dx if grad is not None] # W_t+1 - W_t = -n * grad

        grad_diff = 0
        grad_count = 0

        for gx, gy in zip(weight_updates, copy_delta):
            grad_diff += ((gx - gy) ** 2).sum()

        grad_diff.backward()
        return grad_diff

    optimizer.step(closure)
    # scheduler.step()
    
    if iters % 1000 == 0:
        current_loss = closure().item()
        print(iters, "%.4f" % current_loss)
        # print(dummy_matrix[0][0][:5])

        # Checking if the loss is stagnating
        if abs(current_loss - best_loss) < 0.001:
            stuck = True
        else:
            stuck = False
            best_loss = current_loss  

        # If the loss is stuck for too many iterations, then I am adding randomness to escape local minima
        if stuck == True: 
            print("Loss stuck at {:.4f}, adding randomness to dummy_matrix.".format(current_loss))
            current_lr = optimizer.param_groups[0]['lr']
            torch.save(dummy_matrix, 'dummy_matrix.pt')

            dummy_matrix = add_randomness(dummy_matrix)

            # Reloading the optimizer with new dummy_matrix
            optimizer = torch.optim.Adam([dummy_matrix, dummy_label], lr=current_lr*0.5)

            stuck = False  # Reset counter

        history.append(dummy_matrix.clone().detach().cpu())

0 1.1005
1000 0.7870
2000 0.5504
3000 0.3763
4000 0.2546
5000 0.1770
6000 0.1347
7000 0.1173
8000 0.1130
9000 0.1126
Loss stuck at 0.1126, adding randomness to dummy_matrix.
10000 0.1126
Loss stuck at 0.1126, adding randomness to dummy_matrix.
11000 0.1126
Loss stuck at 0.1126, adding randomness to dummy_matrix.
12000 0.1126
Loss stuck at 0.1126, adding randomness to dummy_matrix.
13000 0.1126
Loss stuck at 0.1126, adding randomness to dummy_matrix.
14000 0.1126
Loss stuck at 0.1126, adding randomness to dummy_matrix.
15000 0.1126
Loss stuck at 0.1126, adding randomness to dummy_matrix.
16000 0.1126
Loss stuck at 0.1126, adding randomness to dummy_matrix.
17000 0.1126
Loss stuck at 0.1126, adding randomness to dummy_matrix.
18000 0.1126
Loss stuck at 0.1126, adding randomness to dummy_matrix.
19000 0.1126
Loss stuck at 0.1126, adding randomness to dummy_matrix.


In [9]:
print(ground_truth_data[0][0][:20])

tensor([1.1721, 1.0827, 1.2303, 1.3212, 1.0911, 2.0208, 1.0295, 0.7752, 0.9529,
        1.7379, 0.8081, 1.4984, 1.5377, 0.4338, 0.8784, 1.0580, 2.2255, 1.8857,
        1.3636, 1.3382])


In [10]:
print(dummy_matrix[0][0][:20])

tensor([-10.9969, -10.6170,   9.6952,  23.8715,   9.7822, -19.1298,  13.6094,
         15.1885,  38.7057,  -8.2605,  -9.9273,  -0.1120,  31.9626, -12.8309,
        -19.4600,  -6.4289,  26.7692,  -7.6756, -30.5661,   4.7903],
       grad_fn=<SliceBackward0>)
