In [3]:
from tensorboardX import SummaryWriter
writer = SummaryWriter('runs/exp-1')
writer2 = SummaryWriter()
writer3 = SummaryWriter(comment='3x learning rate')


In [4]:
from torchsummary import summary

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torch.autograd import Variable
from tensorboardX import SummaryWriter

dummy_input = (torch.zeros(1, 3),)


class LinearInLinear(nn.Module):
    def __init__(self):
        super(LinearInLinear, self).__init__()
        self.l = nn.Linear(3, 5)

    def forward(self, x):
        return self.l(x)

with SummaryWriter(comment='LinearInLinear') as w:
    w.add_graph(LinearInLinear(), dummy_input, True)


graph(%input : Float(1, 3),
      %1 : Float(5, 3),
      %2 : Float(5)):
  %3 : Float(3!, 5!) = onnx::Transpose[perm=[1, 0]](%1), scope: LinearInLinear/Linear[l]
  %4 : Float(1, 5) = onnx::Gemm[alpha=1, beta=1](%input, %3, %2), scope: LinearInLinear/Linear[l]
  return (%4)



In [4]:
import os
import time
import torch
import logging
import argparse
import easydict

import sys
from model.model import RandWire
from utils.train import train
from utils.hparams import HParam
from utils.writer import MyWriter
from utils.graph_reader import read_graph
from dataset.dataloader import create_dataloader, MNIST_dataloader, CIFAR10_dataloader

import os
import math
import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
import adabound
import itertools
import traceback
from torchsummary import summary

from utils.hparams import load_hparam_str
from utils.evaluation import validate
from model.model import RandWire



if __name__ == '__main__':
#     parser = argparse.ArgumentParser()
#     parser.add_argument('-c', '--config', type=str, required=True,
#                         help="yaml file for configuration")
#     parser.add_argument('-p', '--checkpoint_path', type=str, default=None, required=False,
#                         help="path of checkpoint pt file")
#     parser.add_argument('-m', '--model', type=str, required=True,
#                         help="name of the model. used for logging/saving checkpoints")
#     args = parser.parse_args()

    args = easydict.EasyDict({
 
        "config": './config/config.yaml',
 
        "checkpoint_path": None,
 
        "model": 'test'

 
})

    
    hp = HParam(args.config)
    with open(args.config, 'r') as f:
        hp_str = ''.join(f.readlines())

    pt_path = os.path.join('.', hp.log.chkpt_dir)
    out_dir = os.path.join(pt_path, args.model)
    os.makedirs(out_dir, exist_ok=True)

    log_dir = os.path.join('.', hp.log.log_dir)
    log_dir = os.path.join(log_dir, args.model)
    os.makedirs(log_dir, exist_ok=True)

    if args.checkpoint_path is not None:
        chkpt_path = args.checkpoint_path
    else:
        chkpt_path = None

    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(levelname)s - %(message)s',
        handlers=[
            logging.FileHandler(os.path.join(log_dir,
                '%s-%d.log' % (args.model, time.time()))),
            logging.StreamHandler()
        ]
    )
    logger = logging.getLogger()
    
    if hp.data.train == '' or hp.data.val == '':
        logger.error("hp.data.train, hp.data.val cannot be empty")
        raise Exception("Please specify directories of train data.")

    if hp.model.graph0 == '' or hp.model.graph1 == '' or hp.model.graph2 == '':
        logger.error("hp.model.graph0, graph1, graph2 cannot be empty")
        raise Exception("Please specify random DAG architecture.")

    graphs = [
        read_graph(hp.model.graph0),
        read_graph(hp.model.graph1),
        read_graph(hp.model.graph2),
    ]

    writer = MyWriter(log_dir)
    
    dataset = hp.data.type
    switcher = {
            'MNIST': MNIST_dataloader,
            'CIFAR10':CIFAR10_dataloader,
            'ImageNet':create_dataloader,
            }
    assert dataset in switcher.keys(), 'Dataset type currently not supported'
    dl_func = switcher[dataset]
    trainset = dl_func(hp, args, True)
    valset = dl_func(hp, args, False)

    #train(out_dir, chkpt_path, trainset, valset, writer, logger, hp, hp_str, graphs)
    
    
################# Train
    model = RandWire(hp, graphs).cuda()
    if hp.train.optimizer == 'adam':
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=hp.train.adam)
    elif hp.train.optimizer == 'adabound':
        optimizer = adabound.AdaBound(model.parameters(),
                             lr=hp.train.adabound.initial,
                             final_lr=hp.train.adabound.final)
    elif hp.train.optimizer == 'sgd':
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=hp.train.sgd.lr,
                                    momentum=hp.train.sgd.momentum,
                                    weight_decay=hp.train.sgd.weight_decay)
    else:
        raise Exception("Optimizer not supported: %s" % hp.train.optimizer)

    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, hp.train.epoch)

    init_epoch = -1
    step = 0

    if chkpt_path is not None:
        logger.info("Resuming from checkpoint: %s" % chkpt_path)
        checkpoint = torch.load(chkpt_path)
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        step = checkpoint['step']
        init_epoch = checkpoint['epoch']

        if hp_str != checkpoint['hp_str']:
            logger.warning("New hparams are different from checkpoint.")
            logger.warning("Will use new hparams.")
        # hp = load_hparam_str(hp_str)
    else:
        logger.info("Starting new training run")
        logger.info("Writing graph to tensorboardX...")
        #print(model)
        parameters = 0
        for p in list(model.parameters()):
            nn =1
            for s in list(p.size()):
                nn = nn * s
            parameters += nn
        #print("Parameters",parameters)
        writer.write_graph(model, torch.randn(7, hp.model.input_maps, 224, 224).cuda())
        logger.info("Finished.")

    try:
        model.train()
        for epoch in itertools.count(init_epoch+1):
            loader = tqdm.tqdm(trainset, desc='Train data loader')
            for data, target in loader:
                data, target = data.cuda(), target.cuda()
                optimizer.zero_grad()
                output = model(data)
                loss = F.nll_loss(output, target)
                loss.backward()
                optimizer.step()
                
                loss = loss.item()
                if loss > 1e8 or math.isnan(loss):
                    logger.error("Loss exploded to %.02f at step %d!" % (loss, step))
                    raise Exception("Loss exploded")

                writer.log_training(loss, step)
                loader.set_description('Loss %.02f at step %d' % (loss, step))
                step += 1                

            save_path = os.path.join(out_dir, 'chkpt_%03d.pt' % epoch)
            torch.save({
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'lr_scheduler': lr_scheduler.state_dict(),
                'step': step,
                'epoch': epoch,
                'hp_str': hp_str,
            }, save_path)
            logger.info("Saved checkpoint to: %s" % save_path)

            validate(model, valset, writer, epoch)
            lr_scheduler.step()

    except Exception as e:
        logger.info("Exiting due to exception: %s" % e)
        traceback.print_exc()


2019-07-08 14:03:35,217 - INFO - Starting new training run
2019-07-08 14:03:35,218 - INFO - Writing graph to tensorboardX...
2019-07-08 14:03:37,362 - INFO - Finished.

Train data loader:   0%|                                                                      | 0/8572 [00:00<?, ?it/s]
Loss 2.22 at step 3:   0%|                                                            | 4/8572 [00:01<52:30,  2.72it/s]

KeyboardInterrupt: 

In [7]:
import torch
model

RandWire(
  (conv1): Conv2d(1, 39, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (bn1): BatchNorm2d(39, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(39, 78, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (bn2): BatchNorm2d(78, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dagly3): DAGLayer(
    (nodes): ModuleList(
      (0): NodeOp(
        (conv): SeparableConv2d(
          (conv1): Conv2d(78, 78, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=78, bias=False)
          (pointwise): Conv2d(78, 78, kernel_size=(1, 1), stride=(1, 1), bias=False)
        )
        (bn): BatchNorm2d(78, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): NodeOp(
        (conv): SeparableConv2d(
          (conv1): Conv2d(78, 78, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=78, bias=False)
          (pointwise): Conv2d(78, 78, kernel_size=(1, 1), stride=(1, 1), bias=False)
        )


In [13]:
summary(model,(1,244,244))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 39, 122, 122]             390
       BatchNorm2d-2         [-1, 39, 122, 122]              78
            Conv2d-3           [-1, 78, 61, 61]          27,456
       BatchNorm2d-4           [-1, 78, 61, 61]             156
            Conv2d-5           [-1, 78, 31, 31]             702
            Conv2d-6           [-1, 78, 31, 31]           6,084
   SeparableConv2d-7           [-1, 78, 31, 31]               0
       BatchNorm2d-8           [-1, 78, 31, 31]             156
            NodeOp-9           [-1, 78, 31, 31]               0
           Conv2d-10           [-1, 78, 31, 31]             702
           Conv2d-11           [-1, 78, 31, 31]           6,084
  SeparableConv2d-12           [-1, 78, 31, 31]               0
      BatchNorm2d-13           [-1, 78, 31, 31]             156
           NodeOp-14           [-1, 78,

 SeparableConv2d-253           [-1, 78, 16, 16]               0
     BatchNorm2d-254           [-1, 78, 16, 16]             156
          NodeOp-255           [-1, 78, 16, 16]               0
          Conv2d-256           [-1, 78, 16, 16]             702
          Conv2d-257           [-1, 78, 16, 16]           6,084
 SeparableConv2d-258           [-1, 78, 16, 16]               0
     BatchNorm2d-259           [-1, 78, 16, 16]             156
          NodeOp-260           [-1, 78, 16, 16]               0
          Conv2d-261           [-1, 78, 16, 16]             702
          Conv2d-262          [-1, 156, 16, 16]          12,168
 SeparableConv2d-263          [-1, 156, 16, 16]               0
     BatchNorm2d-264          [-1, 156, 16, 16]             312
          NodeOp-265          [-1, 156, 16, 16]               0
          Conv2d-266           [-1, 78, 16, 16]             702
          Conv2d-267           [-1, 78, 16, 16]           6,084
 SeparableConv2d-268           [-1, 78, 

In [10]:
hp.model

{'channel': 78,
 'classes': 10,
 'input_maps': 1,
 'graph0': 'er-01.txt',
 'graph1': 'er-02.txt',
 'graph2': 'er-03.txt'}

In [5]:
args = easydict.EasyDict({
 
        "batchsize": 100,
 
        "epoch": 20,
 
        "gpu": 0,
 
        "out": "result",
 
        "resume": False,
 
        "unit": 1000
 
})




In [6]:
args.batchsize

100