In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.init as torch_init
import torch.nn.functional as F

In [2]:
selected_delta_tensor = torch.load('files/selected_delta.pt', map_location=torch.device('cpu'))

In [3]:
for key, value in selected_delta_tensor.items():
    print(key, value.shape)

fc1.weight torch.Size([512, 64])
fc1.bias torch.Size([512])
fc_att1.0.weight torch.Size([512, 64])
fc_att1.0.bias torch.Size([512])
fc2.weight torch.Size([32, 512])
fc2.bias torch.Size([32])
fc_att2.0.weight torch.Size([32, 512])
fc_att2.0.bias torch.Size([32])
fc3.weight torch.Size([1, 32])
fc3.bias torch.Size([1])


In [4]:
sample_data = torch.load('files/training_data1.pt', map_location=torch.device('cpu'))
sample_label = torch.load('files/training_labels1.pt', map_location=torch.device('cpu'))
print(sample_data.shape, sample_label.shape)

torch.Size([1, 10, 64]) torch.Size([1])


In [5]:
keys_to_delete = ['fc_att1.0.weight', 'fc_att1.0.bias', 'fc_att2.0.weight', 'fc_att2.0.bias' ]
for key in keys_to_delete:
    del selected_delta_tensor[key]

In [6]:
def weight_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1 or classname.find('Linear') != -1:
        torch_init.xavier_uniform_(m.weight)
        if m.bias is not None:
            m.bias.data.fill_(0)

class C2FPL_ucf(nn.Module): # multiplication then Addition
    def __init__(self):
        super(C2FPL_ucf, self).__init__()
        self.fc1 = nn.Linear(64, 512)

        self.fc_att1 = nn.Sequential(nn.Linear(64, 512), nn.Softmax(dim = 1))

        self.fc2 = nn.Linear(512, 32)

        self.fc_att2 = nn.Sequential(nn.Linear(512, 32), nn.Softmax(dim = 1))

        self.fc3 = nn.Linear(32, 1)
        self.dropout = nn.Dropout(0.6)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        self.apply(weight_init)


    def forward(self, inputs):
        bs , ncrops, f = inputs.size()
        x = self.fc1(inputs)

        x = self.relu(x)
        x = self.dropout(x)

        x = self.fc2(x)

        x = self.relu(x)
        x = self.dropout(x)

        x = self.sigmoid(self.fc3(x))

        x = x.mean(dim = 1)

        return x

net = C2FPL_ucf().to("cpu")

In [7]:
copy_delta = list(value.clone() for _, value in selected_delta_tensor.items())

In [8]:
dummy_matrix = torch.rand(1, 10, 64, requires_grad=True).to("cpu")
dummy_label = torch.rand(1, 1, dtype=torch.float32, requires_grad=True).to("cpu") 

In [9]:
def cross_entropy_for_onehot(pred, target):
    return torch.mean(torch.sum(- target * F.log_softmax(pred, dim=-1), 1))
criterion = cross_entropy_for_onehot

In [10]:
optimizer = torch.optim.LBFGS([dummy_matrix, dummy_label] )
lr = 0.01 # this value is used as the default one and not been changed. line 80
history = []
stuck = False
best_loss = float('inf') 

def add_randomness(tensor, noise_factor=0.0001):
    noise = torch.randn_like(tensor) * noise_factor
    new_tensor = tensor + noise
    new_tensor = new_tensor.detach().clone() 
    new_tensor.requires_grad = True  
    return new_tensor

for iters in range(30000):
    def closure():
        optimizer.zero_grad()
        pred = net(dummy_matrix)
        # dummy_label.data = F.sigmoid(dummy_label.data)
        # print(pred, dummy_label)
        dummy_loss = torch.nn.BCEWithLogitsLoss()(pred, dummy_label)

        dummy_dy_dx = torch.autograd.grad(dummy_loss, net.parameters(), create_graph=True, allow_unused=True)
        weight_updates = [-lr * grad for grad in dummy_dy_dx if grad is not None] # W_t+1 - W_t = -n * grad

        grad_diff = 0
        grad_count = 0

        for gx, gy in zip(weight_updates, copy_delta):
            grad_diff += ((gx - gy) ** 2).sum()

        grad_diff.backward()
        return grad_diff

    optimizer.step(closure)
    # scheduler.step()
    
    if iters % 1000 == 0:
        current_loss = closure().item()
        print(iters, "%.4f" % current_loss)

        # Checking if the loss is stagnating
        if abs(current_loss - best_loss) < 0.0001:
            stuck = True
        else:
            stuck = False
            best_loss = current_loss  

        # If the loss is stuck for too many iterations, then I am adding randomness to escape local minima
        if stuck == True: 
            print("Loss stuck at {:.4f}, adding randomness to dummy_matrix.".format(current_loss))
            current_lr = optimizer.param_groups[0]['lr']
            torch.save(dummy_matrix, 'dummy_matrix.pt')

            dummy_matrix = add_randomness(dummy_matrix)

            # Reloading the optimizer with new dummy_matrix
            optimizer = torch.optim.Adam([dummy_matrix, dummy_label], lr=current_lr*0.5)
            # optimizer = torch.optim.LBFGS([dummy_matrix, dummy_label] )

            stuck = False  # Reset counter

        history.append(dummy_matrix.clone().detach().cpu())

0 13.1000
1000 13.1131
2000 13.1131
Loss stuck at 13.1131, adding randomness to dummy_matrix.
3000 13.1131
Loss stuck at 13.1131, adding randomness to dummy_matrix.
4000 13.1131
Loss stuck at 13.1131, adding randomness to dummy_matrix.
5000 13.1131
Loss stuck at 13.1131, adding randomness to dummy_matrix.
6000 13.1131
Loss stuck at 13.1131, adding randomness to dummy_matrix.
7000 13.1131
Loss stuck at 13.1131, adding randomness to dummy_matrix.
8000 13.1131
Loss stuck at 13.1131, adding randomness to dummy_matrix.
9000 13.1131
Loss stuck at 13.1131, adding randomness to dummy_matrix.
10000 13.1131
Loss stuck at 13.1131, adding randomness to dummy_matrix.
11000 13.1131
Loss stuck at 13.1131, adding randomness to dummy_matrix.
12000 13.1131
Loss stuck at 13.1131, adding randomness to dummy_matrix.
13000 13.1131
Loss stuck at 13.1131, adding randomness to dummy_matrix.
14000 13.1131
Loss stuck at 13.1131, adding randomness to dummy_matrix.
15000 13.1131
Loss stuck at 13.1131, adding rando