In [1]:
import numpy as np
import time
#import pandas as pd
import matplotlib.pyplot as plt
import os

import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

from torchsummary import summary

import gradnet as gn

In [2]:
def torch_cifar_loader_basic(bs=100):
    transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

    transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

    train_dataset = torchvision.datasets.CIFAR10(root='data/',
                                                 train=True, 
                                                 transform=transform_train,
                                                 download=True)

    test_dataset = torchvision.datasets.CIFAR10(root='data/',
                                                train=False, 
                                                transform=transform_test,
                                                download=True) 

    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                               batch_size=bs, 
                                               shuffle=True,
                                               pin_memory=True,
                                               num_workers=16)

    test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                              batch_size=bs, 
                                              shuffle=False,
                                              pin_memory=True,
                                              num_workers=16)
    
    grad_test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                              batch_size=1, 
                                              shuffle=False,
                                              pin_memory=True,
                                              num_workers=16)
    
    return train_loader,test_loader,grad_test_loader

def torch_mnist_loader_basic(bs=100):
    transform_train = transforms.Compose([
    #transforms.RandomCrop(32, padding=4),
    #transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    #transforms.Scale()
    #transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

    transform_test = transforms.Compose([
    transforms.ToTensor(),
    #transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

    train_dataset = torchvision.datasets.MNIST(root='data/',
                                                 train=True, 
                                                 transform=transform_train,
                                                 download=True)

    test_dataset = torchvision.datasets.MNIST(root='data/',
                                                train=False, 
                                                transform=transform_test,
                                                download=True) 

    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                               batch_size=bs, 
                                               shuffle=True,
                                               pin_memory=True,
                                               num_workers=16)

    test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                              batch_size=bs, 
                                              shuffle=False,
                                              pin_memory=True,
                                              num_workers=16)
    grad_test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                              batch_size=1, 
                                              shuffle=False,
                                              pin_memory=True,
                                              num_workers=16)
    
    return train_loader,test_loader,grad_test_loader

In [3]:
def base_train_step(model,device,optimizer,criterion,train_loader):
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)[0]
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    return

def base_model_eval(model,device,test_loader,dbg):
    total = 0
    correct = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)[0]
        _, predicted = torch.max(outputs.data,1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    return correct,total

def stat_activations(model,device,test_loader,dbg):
    n = 0
    for im,l in test_loader:
        im = im.to(device)
        out = model_base(im)
        out[1].type=torch.BoolType
        if n==0:
            l1 = out[1] #.detach().cpu().numpy().astype(np.bool)
            l2 = out[2] #.detach().cpu().numpy().astype(np.bool)
            l3 = out[3] #.detach().cpu().numpy().astype(np.bool)
            l4 = out[4] #.detach().cpu().numpy().astype(np.bool)
            n=1
        else:
            l1 = torch.cat((l1,out[1]),dim=0)
            l2 = torch.cat((l2,out[2]),dim=0) #.detach().cpu().numpy().astype(np.bool)
            l3 = torch.cat((l3,out[3]),dim=0) #.detach().cpu().numpy().astype(np.bool)
            l4 = torch.cat((l4,out[4]),dim=0) #.detach().cpu().numpy().astype(np.bool)
            
    return torch.cat((l1,l2,l3,l4),dim=1).detach().cpu().numpy().astype(np.bool)
            
    #return l1.detach().cpu().numpy().astype(np.bool),l2.detach().cpu().numpy().astype(np.bool),l3.detach().cpu().numpy().astype(np.bool),l4.detach().cpu().numpy().astype(np.bool)

def tangent_sens(x,model):
    x.requires_grad = True
    out = torch.sum(model(x)[0][0])
    t0=time.time()
    #for i in [5]: #range(len(out)):
    grads = torch.autograd.grad(out,x,retain_graph=True,create_graph=True)[0]
    n=0
    for t in grads:
        for k in t.flatten():
            act_grad = torch.autograd.grad(k,model.parameters(),retain_graph=True,create_graph=True,allow_unused=True) #,allow_unused=True)
    #       print("act:",act_grad)
            for l in act_grad:
    #            print("\tl:",l)
                if l!=None:
                    if n==0:
                    #fn+= [g.norm()**2 for g in l.flatten()] 
                        hess = l.flatten()
                    else:
                        hess = torch.cat((hess,l.flatten()))
                    n+=1
    #if i==0: 
    ts = torch.norm(hess)
    #else:
    #    ts +=torch.norm(hess)
    print(time.time()-t0,"sec")
    return ts

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
dbg = open("tansens.dbg","a")
print(torch.backends.cudnn.version(),torch.version.cuda,torch.cuda.is_available())
print("device:",device)

#device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_epochs = 100
learning_rate = 0.1
train_loader,test_loader,grad_test_loader = torch_cifar_loader_basic(64)
classes = ('plane', 'car', 'bird', 'cat','deer', 'dog', 'frog', 'horse', 'ship', 'truck')

8005 11.1 True
device: cuda
Files already downloaded and verified
Files already downloaded and verified


In [5]:
class CNN(nn.Module):
    def __init__(self, num_classes=10):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 64, kernel_size=5)
        self.conv2 = nn.Conv2d(64, 64, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.relu = nn.ReLU(inplace=True)
        self.fc1 = nn.Linear(4*4*64, num_classes)
        self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)

    def forward(self, x):
        x1 = self.relu(self.max_pool(self.conv1(x)))
        x2 = self.relu(self.max_pool(self.conv2_drop(self.conv2(x1))))
        x3 = x2.view(x2.size(0), -1)   
        x4 = self.fc1(x3)
        return x4,torch.sign(x1.flatten()),torch.sign(x2.flatten())

class CNNc3(nn.Module):
    def __init__(self, num_classes=10):
        super(CNNc3, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=5)
        self.conv2 = nn.Conv2d(64, 64, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.relu = nn.ReLU(inplace=True)
        self.fc1 = nn.Linear(5*5*64, num_classes)
        self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)

    def forward(self, x):
        x1 = self.relu(self.max_pool(self.conv1(x)))
        x2 = self.relu(self.max_pool(self.conv2_drop(self.conv2(x1))))
        x3 = x.view(x2.size(0), -1)   
        x4 = self.fc1(x3)
        return x4,x1,x2
    
class MLP1x(nn.Module):
    def __init__(self, dim, hidd, num_classes=10):
        super(MLP1x, self).__init__()
        self.fc1 = nn.Linear(dim, hidd)
        self.fc2 = nn.Linear(hidd, num_classes)
        self.relu = nn.ReLU(inplace=True)
        #self.fc2 = nn.Linear(hidd, num_classes)
    
    def forward(self, x):
        out1 = self.fc1(x.view(x.size(0), -1))
        out2 = self.relu(out1)
        out3 = self.fc2(out2)
        return out3,out2
    
class MLP2x(nn.Module):
    def __init__(self, dim, hidd, num_classes=10):
        super(MLP2x, self).__init__()
        self.fc1 = nn.Linear(dim, hidd)
        self.fc2 = nn.Linear(hidd, hidd)
        self.fc3 = nn.Linear(hidd, num_classes)
        self.relu = nn.ReLU(inplace=True)
        #self.fc2 = nn.Linear(hidd, num_classes)
    
    def forward(self, x):
        out1 = self.fc1(x.view(x.size(0), -1))
        out2 = self.relu(out1)
        out3 = self.fc2(out2)
        out4 = self.relu(out3)
        out5 = self.fc3(out4)
        return out5,torch.sign(out2),torch.sign(out4)
    
class MLP4x(nn.Module):
    def __init__(self, dim, hidd, num_classes=10):
        super(MLP4x, self).__init__()
        self.fc1 = nn.Linear(dim, hidd)
        self.fc2 = nn.Linear(hidd, hidd)
        self.fc3 = nn.Linear(hidd, hidd)
        self.fc4 = nn.Linear(hidd, hidd)
        self.fc5 = nn.Linear(hidd, num_classes)
        self.relu = nn.ReLU(inplace=True)
        #self.fc2 = nn.Linear(hidd, num_classes)
    
    def forward(self, x):
        out1 = self.fc1(x.view(x.size(0), -1))
        out2 = self.relu(out1)
        out3 = self.fc2(out2)
        out4 = self.relu(out3)
        out5 = self.fc3(out4)
        out6 = self.relu(out5)
        out7 = self.fc4(out6)
        out8 = self.relu(out7)
        out9 = self.fc5(out8)
        return out9,torch.sign(out2),torch.sign(out4),torch.sign(out6),torch.sign(out8)

In [6]:
directory = "states"

try:
    os.stat(directory)
except:
    os.mkdir(directory)

model_base = MLP4x(dim=3*32*32,hidd=100).to(device)
criterion_base = nn.CrossEntropyLoss()
learning_rate = 0.05
optimizer_base = torch.optim.SGD(model_base.parameters(), lr=learning_rate, weight_decay=5e-4)
#optimizer_base = torch.optim.SGD(model_base.parameters(), lr=learning_rate, momentum=0.9, weight_decay=5e-4)
#optimizer_base = torch.optim.Adam(model_base.parameters(), lr=learning_rate)
def update_lr(optimizer_base, lr):    
    for param_group in optimizer_base.param_groups:
        param_group['lr'] = lr
summary(model_base,input_size=(1,3,32,32))

epochs = 200
learning_rate = 0.05
curr_lr = learning_rate

tansens = open("ts_cifar_no_wd.dbg","w")

for epoch in range(epochs):
    t0 = time.time()
    base_train_step(model_base,device,optimizer_base,criterion_base,train_loader)
    t1 = time.time()    
    correct_te, total_te = base_model_eval(model_base,device,test_loader,dbg)
    correct_tr, total_tr = base_model_eval(model_base,device,train_loader,dbg)
    torch.save(model_base.state_dict(), str("states/mlp4x100_cifar_sgd_no_wd_"+str(epoch)+".ckpt"))
    n=0
            
    t2 = time.time()    
    l_tr = stat_activations(model_base,device,train_loader,dbg)
    l_te = stat_activations(model_base,device,test_loader,dbg)   
    t3 = time.time()    
    np.save("states/mlp4x100_cifar_sgd_no_wd_te_"+str(epoch)+".npy",l_te)
    np.save("states/mlp4x100_cifar_sgd_no_wd_tr_"+str(epoch)+".npy",l_tr)
    print(epoch,correct_tr/total_tr,correct_te/total_te,"in",t3-t0,"sec",t1-t0,t2-t1,t3-t2,file=tansens)
    tansens.flush()

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                  [-1, 100]         307,300
              ReLU-2                  [-1, 100]               0
            Linear-3                  [-1, 100]          10,100
              ReLU-4                  [-1, 100]               0
            Linear-5                  [-1, 100]          10,100
              ReLU-6                  [-1, 100]               0
            Linear-7                  [-1, 100]          10,100
              ReLU-8                  [-1, 100]               0
            Linear-9                   [-1, 10]           1,010
Total params: 338,610
Trainable params: 338,610
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.01
Params size (MB): 1.29
Estimated Total Size (MB): 1.31
-------------------------------------------

FileNotFoundError: [Errno 2] No such file or directory: 'states/mlp4x100_cifar_sgd_no_wd_0.ckpt'