In [1]:
import argparse
import os
import sys
import tabulate
import time
import torch
import torch.nn.functional as F

import curves
import data
import models
import utils

import numpy as np

In [2]:
torch.backends.cudnn.benchmark = True

loaders, num_classes = data.loaders(
    "CIFAR10",
    "data",
    128,
    1,
    "VGG",
    False)

Files already downloaded and verified
Using train (45000) + validation (5000)
Files already downloaded and verified


In [3]:
architecture = getattr(models, "VGG16")
model1 = architecture.base(num_classes=10, **architecture.kwargs)
model2 = architecture.base(num_classes=10, **architecture.kwargs)

model1.load_state_dict(torch.load("curves/curve1/checkpoint-100.pt")['model_state'])
model2.load_state_dict(torch.load("curves/curve2/checkpoint-100.pt")['model_state'])

In [4]:
model1

VGGBase(
  (layer_blocks): ModuleList(
    (0): ModuleList(
      (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    )
    (1): ModuleList(
      (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    )
    (2): ModuleList(
      (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    )
    (3): ModuleList(
      (0): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    )
    (4): ModuleList(
      (0): Conv2d(512, 512, kernel_size=(3, 3), 

In [4]:
model3 = architecture.base(num_classes=10, **architecture.kwargs)
model3.cuda();

In [5]:
criterion = F.cross_entropy
regularizer = None 
optimizer = torch.optim.SGD(
    filter(lambda param: param.requires_grad, model3.parameters()),
    lr=0.01,
    momentum=0.9,
    weight_decay=0.0
)

In [6]:
def train(train_loader, model, optimizer, criterion, regularizer=None, lr_schedule=None):
    loss_sum = 0.0
    correct = 0.0

    num_iters = len(train_loader)
    model.train()
    for iter, (input, target) in enumerate(train_loader):
        if lr_schedule is not None:
            lr = lr_schedule(iter / num_iters)
            adjust_learning_rate(optimizer, lr)
        input = input.cuda(non_blocking=True)
        target = target.cuda(non_blocking=True)

        output = model(input)
        loss = criterion(output, target)
        if regularizer is not None:
            loss += regularizer(model)

        optimizer.zero_grad()
        loss.backward()
        
        loss_sum += loss.item() * input.size(0)
        pred = output.data.argmax(1, keepdim=True)
        correct += pred.eq(target.data.view_as(pred)).sum().item()
        
    grad = np.concatenate([p.grad.data.cpu().numpy().ravel() for p in model.parameters()])
    
    return grad

## Computing gradients 

In [None]:
num_points = 50
l_grad = []
for j in range(0, num_points):
    for p1, p2, p3 in zip(model1.parameters(), model2.parameters(), model3.parameters()):
        alpha = j * 1.0 / (num_points-1)
        p3.data.copy_(alpha * p1.data + (1.0 - alpha) * p2.data)        
    grad = train(loaders['train'], model3, optimizer, criterion, )
    l_grad.append(grad)
        

In [60]:
grad = l_grad

In [68]:
l_grad = grad

In [69]:
for i, v in enumerate(l_grad):
    print(np.linalg.norm(v))
    l_grad[i] = v/np.linalg.norm(v)

0.99999994
1.0
1.0
1.0
1.0000001


In [70]:
s = 0
for v in l_grad:
    s += v
print(np.linalg.norm(s)/num_points)
s = s/np.linalg.norm(s)

0.49538140296936034


In [71]:
for v in l_grad:
    print((v*s).sum())

0.4571768
0.5298122
0.5464061
0.517551
0.42598894


In [73]:
for i in range(len(l_grad)):
    l_grad[i] = l_grad[i]-(l_grad[i]*s).sum()*s
    print(np.linalg.norm(l_grad[i]))

0.8893923
0.8481226
0.8375253
0.85565865
0.9047352


In [74]:
s = 0
for v in l_grad:
    s += v
print(np.linalg.norm(s)/num_points)
s = s/np.linalg.norm(s)

3.1492970720137234e-08


In [75]:
for v in l_grad:
    print((v*s).sum())

-0.0019658336
-0.0009832255
0.0011526728
0.0003624293
0.0014340992
