In [1]:
# Gautam Jain, Jannis Horn 

%matplotlib notebook
import time
from typing import Tuple
from collections import OrderedDict
import numpy as np
import matplotlib as plt
import torch  
import torch.nn as nn
import torch.nn.functional as func
import torch.optim as topt
import wandb

if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

torch.manual_seed( 666 )

<torch._C.Generator at 0x7f73138ccf90>

In [2]:
import torchvision.datasets as dsets
import torchvision.transforms as transforms

trainset = dsets.CIFAR10('./data', train=True, download=True, transform=transforms.ToTensor())
testset = dsets.CIFAR10('./data', train=False, download=True, transform=transforms.ToTensor())

print( "Input Shape: {}".format( trainset[0][0].shape ) )
print( "Training Set: {}, Test Set: {}".format( len(trainset), len(testset) ) )

#print( trainset[0][0] )

dim_in = tuple(trainset[0][0].shape)
dim_out = 10
dt_dims = (dim_in, dim_out)

Files already downloaded and verified
Files already downloaded and verified
Input Shape: torch.Size([3, 32, 32])
Training Set: 50000, Test Set: 10000


In [3]:
def logTraining( epoch, train_loss, time ):
    wandb.log( {"train_loss": train_loss, "train_time": time}, step=epoch )
    
def logTest( epoch, test_loss, acc, conf_mat, time ):
    wandb.log( {"test_loss": test_loss,
                "accuracy": acc, 
                "conf_mat": [wandb.Image(conf_mat, caption="Confussion Matrix")],
                "test_time": time},
               step=epoch)

In [4]:
#hook taken from https://discuss.pytorch.org/t/how-can-l-load-my-best-model-as-a-feature-extractor-evaluator/17254/6

class ConvNet( nn.Module ):
    def __init__( self, c_layers, l_layers, map_size ):
        super( ConvNet, self ).__init__()
        self.conv_layers = c_layers
        self.lin_layers = l_layers
        self.lin_inp_dim = l_layers[0][0].in_features
        self.map_size = map_size
        self.gradients = {}
        self.activations = {}
        
    
    def save( self, f ):
        torch.save(self.state_dict(), "{}.th".format(f))
        
    def load( self, f ):
        self.load_state_dict(torch.load( "{}.th".format(f) ))
        
    def hook( self, mode, seq, name ):
        def getActivationHook( model, input, output ):
            self.activations[name] = output.detach().to( "cpu" )
        def getGradientHook( model, grad_input, grad_output ):
            self.gradients[name] = grad_output.detach().to( "cpu" )
        if hook in [1,3]:
            module.register_forward_hook( getActivationHook )( seq[0], name, self.activations )
        if hook in [2,3]:
            module.register_backward_hook( getGradientHook )( seq[0], name, self.gradients )
                
    
    def forward( self, x ):
        for layer in self.conv_layers:
            x = layer(x)
        x = x.view( -1, self.lin_inp_dim )
        for layer in self.lin_layers:
            x = layer(x)
        return x
    
    
    def getL1Norm( self ):
        n = torch.tensor( 0.0 ).to( device )
        for l in self.conv_layers:
            n += l[0].weight.abs().sum()
        for l in self.lin_layers[:-1]:
            n += l[0].weight.abs().sum()
        n += self.lin_layers[-1].weight.abs().sum()
        return n
    
    def getL2Norm( self ):
        n = torch.tensor( 0.0 ).to( device )
        for l in self.conv_layers:
            n += l[0].weight.square().sum()
        for l in self.lin_layers[:-1]:
            n += l[0].weight.square().sum()
        n += self.lin_layers[-1].weight.square().sum()
        return n
    
    
    def getGradientNorm( self, norm=2 ):
        out = {}
        for key, val in self.gradients:
            out[key] = val.norm( norm )
            
    def getActivations( self ):
        return self.activations
    
    
    def wandbConfig( self ):
        wandb.config.conv_layers = len(self.conv_layers)
        wandb.config.lin_layers = len(self.lin_layers)
        for it, seq in enumerate(self.conv_layers):
            self.convToParamSet( it, seq )
        for it, seq in enumerate(self.lin_layers):
            self.linToParamSet( it, seq )
            
    def convToParamSet( self, it, seq ):
        def wandbConfig( name, it, key, val ):
            wandb.config.update( {"{}{}_{}".format(name, it, key): val} )
            
        for name, mod in seq.named_modules():
            if "conv" in name:
                wandbConfig( name, it, "ch_out", mod.out_channels )
                wandbConfig( name, it, "ksize", mod.kernel_size )
                wandbConfig( name, it, "stride", mod.stride )
                wandbConfig( name, it, "pad", mod.padding )
            elif "pool" in name:
                if isinstance( mod, nn.MaxPool2d ): pt = "max"
                elif isinstance( mod, nn.AvgPool2d ): pt = "avg"
                wandbConfig( name, it, "pool_type", pt )
                wandbConfig( name, it, "pool_ks", mod.kernel_size )
            elif "drop" in name:
                wandb.config.update( {"{}{}".format(name,it): mod.p} )
            elif "act" in name:
                wandb.config.update( {"{}{}".format(name,it): mod} )
                
    def linToParamSet( self, it, seq ):
        for name, mod in seq.named_modules():
            if "lin" in name:
                wandb.config.update( {"{}{}".format(name,it): mod.out_features} )
            if "drop" in name:
                wandb.config.update( {"{}{}".format(name,it): mod.p} )

In [5]:
class Config():
    def __init__(self, dt_dims ):
        super().__init__()
        self.dts_dims = dt_dims
        self.num_c = 0
        self.num_l = 0
        self.ps_c = []
        self.ps_l = []

    @classmethod 
    def fromList( cls, dt_dims, ilist ):
        out = cls( dt_dims )
        out.parseList( ilist )
        return out

    @classmethod
    def fromDict( cls, dt_dims, idict ):
        out = cls( dt_dims )
        out.parseDict( idict )
        return out

    def parseDict( self, idict ):
        def ifInDict( key, default ):
            if key in idict:
                return idict[key]
            else:
                return default

        act_f = ifInDict( "act", "relu" )
        dp_f = ifInDict( "dp", 0.0 )
        if "num_c" in idict:
            out_c = ifInDict( "out_c", None )
            ksize_c = ifInDict( "ksize", None )
            act_c = ifInDict( "act_c", act_f )
            stride_c = ifInDict( "stride", 1 )
            pad_c = ifInDict( "pad", False )
            pool_c = ifInDict( "pool", 0 )
            dp_c = ifInDict( "dp_c", dp_f )
            for it in range(idict["num_c"]):
                out = ifInDict( "out_c_{}".format(it), out_c )
                ksize = ifInDict( "ksize_{}".format(it), ksize_c )
                act = ifInDict( "act_c_{}".format(it), act_c )
                stride = ifInDict( "stride_{}".format(it), stride_c )
                pad = ifInDict( "pad_{}".format(it), pad_c )
                pool = ifInDict( "pool_{}".format(it), pool_c )
                dp = ifInDict( "dp_c_{}".format(it), dp_c )
                self.addConvLayer( out, ksize, act, stride, pad, pool, dp )
        if "num_l" in idict:
            out_l = ifInDict( "out_l", None )
            act_l = ifInDict( "act_l", act_f )
            dp_l = ifInDict( "dp_l", dp_f )
            for it in range( idict["num_l"] ):
                out = ifInDict( "out_l_{}", out_l )
                act = ifInDict( "act_l_{}", act_l )
                dp_l = ifInDict( "dp_l_{}", dp_l )
                self.addLinearLayer( out, act, dp_l )


    def parseList( self, ilist ):
        def ifIt( obj, it, default ):
            if len(obj) > it: return obj[it]
            else: return default

        for tp in ilist:
            #Convolutional Parameters: ["c",ch_out,ksize,act_str,stride*,pad*,pool*,dropout*]
            if tp[0] == "c":
                self.addConvLayer( tp[1], tp[2], tp[3], ifIt(tp,4,1), ifIt(tp,5,0),
                                   ifIt(tp,6,("max",1)), ifIt(tp,7,0.0) )
            #Linear Parameters: ["l",size_out,act_str,dropout*]
            elif tp[0] == "l":
                self.addLinearLayer( tp[1], tp[2], ifIt(tp,3,0.0) )



    def addConvLayer( self, ch_out: int, k_s: int, act: str, stride: int, pad: bool, 
                      pool: Tuple[str, int], dropout: float ):
        self.num_c += 1
        self.ps_c.append( {"out":ch_out, "ksize":k_s, "act":act, "str":stride, 
                           "pad":pad, "pool":pool, "dp":dropout} )

    def addLinearLayer( self, s_out: int, act: str, dropout: int ):
        self.num_l += 1
        self.ps_l.append( {"out":s_out, "act":act, "dp":dropout} )


    def __getitem__( self, it ):
        if it < self.num_c:
            return self.ps_c[it]
        else:
            return self.ps_l[it-num_c] 


class Constructor:
    def __call__( self, net_type, config ):
        conv_layers = nn.ModuleList()
        lin_layers = nn.ModuleList()
        map_size = []
        dim_in = config.dts_dims[0]
        map_size.append( dim_in )
        for ps in config.ps_c:
            l, dim_in = self.buildConvLayer( dim_in, ps )
            conv_layers.append( l )
            map_size.append( dim_in )
        dim_in = np.array(dim_in, dtype=np.int).prod().item()
        for ps in config.ps_l:
            l, dim_in = self.buildLinLayer( dim_in, ps )
            lin_layers.append( l )
            map_size.append( dim_in )
        lin_layers.append( nn.Linear( dim_in, config.dts_dims[1] ) )
        map_size.append( config.dts_dims[1] )
        return net_type( conv_layers, lin_layers, map_size )

    def buildConvLayer( self, d_in, params ):
        out = OrderedDict()
        if params["pad"]: pad = int((params["ksize"]-1) /2)
        else: pad = 0
        out["conv"] = nn.Conv2d( d_in[0], params["out"], params["ksize"], params["str"], padding=pad )
        if params["dp"] > 0.0:
            out["drop"] = nn.Dropout( params["dp"] )
        pl = params["pool"]
        if pl[1] > 1:
            pool = self.strToPool( pl[0] )
            out["pool"] = pool( pl[1] )
        out["act"] = self.strToAct( params["act"] )()
        toNewSize = lambda x : (x-params["ksize"]+1 +pad*2) /params["str"] / pl[1]
        d_out = (params["out"], toNewSize(d_in[1]), toNewSize(d_in[2]))
        return nn.Sequential( out ), d_out

    def buildLinLayer( self, s_in, params ):
        out = OrderedDict()
        out["lin"] = nn.Linear( s_in, params["out"] )
        if params["dp"] > 0.0:
            out["drop"] = nn.Dropout( params["dp"] )
        out["act"] = self.strToAct( params["act"] )()
        return nn.Sequential( out ), params["out"]

    def strToAct( self, act_str ):
        if act_str in ["relu", "ReLU"]:
            return nn.ReLU
        elif act_str in ["sig", "sigmoid", "Sigmoid"]:
            return nn.Sigmoid
        elif act_str in ["tanh", "Tanh"]:
            return nn.Tanh
        else:
            raise RuntimeError( "Unknown act_str" )

    def strToPool( self, p_str ):
        if p_str == "max": 
            return nn.MaxPool2d
        elif p_str == "avg": 
            return nn.AvgPool2d
        else:
            raise RuntimeError( "Unknown p_str" )
            
llist = [["c",16,5,"relu",1,True,("max",2)]]*3 +[["l",256,"relu",0.2]]
ldict = {"num_c": 3, "num_l":2, "act":"relu", "dp":0.2, "ksize":3, "pool":("max",2), 
         "out_c":16, "out_c_1":32, "out_l":256, "pad":True}
cfg = Config.fromList( dt_dims, llist )
cstr = Constructor()
test_net = cstr( ConvNet, cfg )
test_net_2 = cstr( ConvNet, Config.fromDict(dt_dims,ldict) )
print(test_net, test_net.map_size)
print(test_net_2, test_net_2.map_size)

ConvNet(
  (conv_layers): ModuleList(
    (0): Sequential(
      (conv): Conv2d(3, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
      (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (act): ReLU()
    )
    (1): Sequential(
      (conv): Conv2d(16, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
      (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (act): ReLU()
    )
    (2): Sequential(
      (conv): Conv2d(16, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
      (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (act): ReLU()
    )
  )
  (lin_layers): ModuleList(
    (0): Sequential(
      (lin): Linear(in_features=256, out_features=256, bias=True)
      (drop): Dropout(p=0.2, inplace=False)
      (act): ReLU()
    )
    (1): Linear(in_features=256, out_features=10, bias=True)
  )
) [(3, 32, 32), (16, 16.0, 16.0), (16, 8.0, 8.0), (16, 4.0, 4

In [10]:
class Runner:
    def __init__( self, project_name, entity="cudavisionlab" ):
        self.pr_name = project_name
        self.entity = entity
    
    def wandbConfig( self ):
        self.model.wandbConfig()
        wandb.config.optimizer = self.optimizer
        wandb.config.regularizer = "lambdas={}".format( self.reg_w )
        wandb.config.dataset = "Cifar10"
    
    
    def initRun( self, model, batch_size, dev, opt, lr, momentum, reg ):
        self.model = model
        self.dev = dev
        self.model.to( dev )
        if opt in [topt.Adagrad, topt.Adadelta, topt.Adam]:
            self.optimizer = opt( model.parameters(), lr=lr )
        else:
            self.optimizer = opt( model.parameters(), lr=lr, momentum=momentum )
        self.loss_func = nn.CrossEntropyLoss().to( dev )
        self.reg_w = torch.Tensor([reg[0], reg[1]]).to( dev )
        self.tr_size = len( trainset )
        self.te_size = len( testset )
        self.loader_training = torch.utils.data.DataLoader( dataset=trainset, 
                                                            batch_size=batch_size, 
                                                            shuffle=True,
                                                            num_workers=2 )
        self.loader_eval = torch.utils.data.DataLoader( dataset=testset, 
                                                        batch_size=batch_size, 
                                                        shuffle=False,
                                                        num_workers=2 )
            
    
    def testModel( self ):
        self.model.eval()
        with torch.no_grad():
            st_pt = time.time()
            conv_mat = np.zeros([10,10])
            corr = 0
            bt_loss = 0.0
            for (x, label) in self.loader_eval:
                #x = x.view( -1, dim_in ).to( self.dev )
                x = x.to( self.dev )
                label = label.to( self.dev )
                out = self.model(x)
                loss = self.loss_func( out, label )
                bt_loss += loss.cpu().item()
                
                _, pred = torch.max( out.data, 1 )
                ls = label.cpu().numpy()
                pr = pred.cpu().numpy()
                for it in range( label.size(0) ):
                    conv_mat[ls[it], pr[it]] += 1
                corr += (pred.cpu() == label.cpu()).sum().float()
                
            ts_time = time.time() -st_pt
        return bt_loss /len( self.loader_eval ), corr/self.te_size, 10*conv_mat/self.te_size, ts_time

        
    def trainModel( self ):
        self.model.train()
        st_pt = time.time()
        bt_loss = 0.0
        for it, (x, label) in enumerate( self.loader_training ):
            self.optimizer.zero_grad()
            #x = x.view(-1,dim_in).to( self.dev )
            x = x.to( self.dev )
            label = label.to( self.dev )

            out = self.model(x)
            loss = self.loss_func(out, label)
            norm_1 = self.model.getL1Norm()
            norm_2 = self.model.getL2Norm()
            loss += self.reg_w[0] *norm_1 + self.reg_w[1] *norm_2
            loss.backward()
            bt_loss += loss.cpu().item()

            self.optimizer.step()

        tr_time = time.time() -st_pt
        return bt_loss /len( self.loader_training ), tr_time
    
    
    def __call__( self, model, config, num_epoch, key="", load=None ):
        run = wandb.init( project=self.pr_name, entity=self.entity, name=key, reinit=True )
        if load is not None:
            model.load( load )
        with run:
            self.initRun( model, config["bs"], config["dev"], 
                          config["opt"], config["lr"], config["mom"],
                          config["reg"] )
            self.wandbConfig()
            for epoch in range(num_epoch):
                tr_l, tr_time = self.trainModel()
                logTraining( epoch, tr_l, tr_time )
                ts_l, acc, cmat, ts_time = self.testModel()
                logTest( epoch, ts_l, acc, cmat, ts_time )
        del model
        

In [None]:
runner = Runner( "cudavision4" )
cfg = { "bs": 10000, "dev": device, "opt": topt.Adam, 
        "lr": 10e-5, "mom": 0.5, "reg": (10e-7, 2*10e-6) }

ldict = {"num_c": 3, "num_l":2, "act":"relu", "dp":0.2, "ksize":3, "pool":("max",2), 
         "out_c":16, "out_c_1":32, "out_c_2":64, "out_l":256, "pad":True}

cstr = Constructor()
net = cstr( ConvNet, Config.fromDict( dt_dims, ldict ) )

runner( net, cfg, 2000, "conv_test", load="conv_test" )

[34m[1mwandb[0m: Currently logged in as: [33mthehorn93[0m (use `wandb login --relogin` to force relogin)


In [10]:
net.save( "conv_test" )