In [183]:
%cd '/home/aris/projects/grab_exp'

%load_ext autoreload
%autoreload 1

from IPython.display import display

/home/aris/projects/grab_exp
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [184]:
import os
import sys
from functools import partial, reduce
from pathlib import Path
from dataclasses import dataclass, field

import evaluate
import numpy as np
import pandas as pd
import wandb
from tqdm import tqdm
from absl import logging
from pprint import pprint

import torch
import torch.nn as nn
from torchvision import datasets, transforms

In [185]:
device = 'cuda'

logging.set_verbosity(logging.INFO)

In [186]:
transform = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize(
            mean=[0.49139968, 0.48215841, 0.44653091],
            std=[0.24703223, 0.24348513, 0.26158784],
        ),
    ]
)

# Loading the dataset and preprocessing
train_dataset = datasets.CIFAR10(
    root="data/external", train=True, download=True, transform=transform
)
test_dataset = datasets.CIFAR10(
    root="data/external", train=False, download=True, transform=transform
)

in_dim, num_classes = 3, 10

loss_fn = nn.CrossEntropyLoss().to(device)

Files already downloaded and verified
Files already downloaded and verified


In [187]:
class LeNet(nn.Module):
    def __init__(self, in_dim=3, out_dim=10):
        super(LeNet, self).__init__()
        # # 1 input image channel, 6 output channels, 5x5 square convolution
        # # kernel
        # self.conv1 = nn.Conv2d(3, 6, 5)
        # self.conv2 = nn.Conv2d(6, 16, 5)
        # # an affine operation: y = Wx + b
        # self.fc1 = nn.Linear(16 * 5 * 5, 120)  # 5*5 from image dimension
        # self.fc2 = nn.Linear(120, 84)
        # self.fc3 = nn.Linear(84, 10)

        self.conv = nn.Sequential(
            nn.Conv2d(in_dim, 6, 5),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(6, 16, 5),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(16, 120, 5),
            nn.ReLU(),
        )
        self.fc = nn.Sequential(nn.Linear(120, 84), nn.ReLU(), nn.Linear(84, out_dim))

    def forward(self, x):
        x = self.conv(x)
        x = x.view(-1, 120).squeeze()
        x = self.fc(x)
        return x

In [188]:
model = LeNet(in_dim, num_classes).to(device)

d = sum(p[1].numel() for p in model.named_parameters())
logging.info(f"Number of training examples: n = {len(train_dataset):,}")
logging.info(f"Number of parameters: d = {d:,}")

INFO:absl:Number of training examples: n = 50,000
INFO:absl:Number of parameters: d = 62,006


In [189]:
batch_size = b = 16

In [190]:
grab_beta = torch.zeros(b, device=device, requires_grad=True)

In [178]:
def forward_hood(model, input):
    

In [179]:
total = 0

for name, module in model.named_modules():
    if len(list(module.children())) > 0:
        continue
    s = sum(p.numel() for p in module.parameters())
    if s > 0:
        total += s
        module.register_forward_pre_hook(forward_hood)

print(total)

62006


In [182]:
from accelerate.utils import set_seed

set_seed(42)

model = LeNet(in_dim, num_classes).to(device)
grab_beta = torch.zeros(b, device=device, requires_grad=True)

d = sum(p[1].numel() for p in model.named_parameters())
logging.info(f"Number of training examples: n = {len(train_dataset):,}")
logging.info(f"Number of parameters: d = {d:,}")


def forward_hood(model, input):
    assert len(input) == 1
    w = input[0]
    return w + torch.einsum("b,b...->b...", grab_beta, torch.randn_like(w))


total = 0

for name, module in model.named_modules():
    if len(list(module.children())) > 0:
        continue
    s = sum(p.numel() for p in module.parameters())
    if s > 0:
        total += s
        module.register_forward_pre_hook(forward_hood)

print(total)

dataloader = torch.utils.data.DataLoader(
    train_dataset, batch_size=batch_size, num_workers=2
)

for x, y in dataloader:
    x, y = x.to(device), y.to(device)
    # print(x.shape)
    # print(y.shape)
    loss = loss_fn(model(x), y)
    print(loss)

    loss.backward()
    print(grab_beta.grad)
    print(grab_beta.grad.sign())
    break

print(grab_beta)

INFO:absl:Number of training examples: n = 50,000
INFO:absl:Number of parameters: d = 62,006


62006
tensor(2.2811, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor([-0.0628,  0.0756,  0.0187,  0.0025,  0.0413, -0.0972, -0.0579, -0.1009,
        -0.0128, -0.0067,  0.0191,  0.0411, -0.0118, -0.0023,  0.0237, -0.0542],
       device='cuda:0')
tensor([-1.,  1.,  1.,  1.,  1., -1., -1., -1., -1., -1.,  1.,  1., -1., -1.,
         1., -1.], device='cuda:0')
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       device='cuda:0', requires_grad=True)


In [98]:
from grablib import GraBSampler