In [1]:
import pandas as pd
import json
from bokeh.plotting import figure, output_notebook, show
import numpy as np
from bokeh.charts.utils import cycle_colors
import sweeper

output_notebook()

In [2]:
from IPython.display import display

def plotLogs(log_names):
    # parse log files, extracting json entry with stats per epoch and creating pandas DataFrame
    frames = [pd.DataFrame(sweeper.loadLog('../logs/'+log+'/log.txt')) for log in log_names]
    colors = ['#5599bb','#ffaaff','#aaff00','#aa00ff','#00ffff','#5500ff','#ff0055','#000000','#ff0000','#00ff00']
    
    # this searches constant parameters across different runs to generate legends
    legends = sweeper.generateLegends(frames)

    
    p = figure(title='learning rate to test accuracy', x_axis_label='lr', y_axis_label='test_acc')
    for i,frame in enumerate(frames):
        p.line(-frame['lr'], 100-frame['test_acc'], color=colors[i], legend=legends[i][-55:-20])
    show(p)

    
    p = figure(title='test error', x_axis_label='epoch')
    for i,frame in enumerate(frames):
        p.line(frame['epoch'], 100-frame['test_acc'], color=colors[i], legend=legends[i][-65:-20])
    show(p)


    
    p = figure(title='log2 loss', x_axis_label='lr', y_axis_label='loss 2^x')
    for i,frame in enumerate(frames):
        p.line(-frame['lr'], np.log2(frame['loss']), color=colors[i], legend=legends[i][-55:-20])
    show(p)
    
    p = figure(title='train_time', x_axis_label='test_time', y_axis_label='train_time')
    for i,frame in enumerate(frames):
        p.line(frame['test_time'], frame['train_time'], color=colors[i], legend=legends[i][-55:-20])
    show(p)

    
    p = figure(title='train / test error vs test', x_axis_label='test error', y_axis_label='train to test ratio')
    for i,frame in enumerate(frames):
        p.line(100-np.array(frame['test_acc']), -(100-np.array(frame['train_acc'])) / ( 100.0001-np.array(frame['test_acc']) ), color=colors[i], legend=legends[i][-55:-20])
    show(p)

       
    p = figure(title='train / test', x_axis_label='lr', y_axis_label='train to test ratio')
    for i,frame in enumerate(frames):
        p.line(frame['lr'], -(100-np.array(frame['train_acc'])) / ( 100.0001-np.array(frame['test_acc']) ), color=colors[i], legend=legends[i][-55:-20])
    show(p)

    
    
    p = figure(title='train error', x_axis_label='lr')
    for i,frame in enumerate(frames):
        p.line(frame['lr'], 100-frame['train_acc'], color=colors[i], legend=legends[i][-55:-20])
    show(p)
    
    
    p = figure(title='learning rate', x_axis_label='epoch', y_axis_label='lr')
    for i,frame in enumerate(frames):
        p.line(frame['epoch'], frame['lr'], color=colors[i], legend=legends[i][-55:-20])
    show(p)
    
    
    p = figure(title='n_parameters (mln) to test_err', x_axis_label='n_parameters', y_axis_label='test_err')
    for i,frame in enumerate(frames):
        p.line(frame['n_parameters']/1000000, 100-frame['test_acc'], color=colors[i], legend=legends[i][-55:-20])
    show(p)

    
    
    p = figure(title='log2(n_parameters) to test_err', x_axis_label='n_parameters 10^x', y_axis_label='test_err')
    for i,frame in enumerate(frames):
        p.line( np.log10(frame['n_parameters']), 100-frame['test_acc'], color=colors[i], legend=legends[i][-55:-20])
    show(p)


In [27]:
plotLogs([
   # 'nin_1875527964'
    #,'vgg_24208029'
    #,'resnet-pre-act_17934627'
        
   # ,  'wide-resnet_1121914561'
    
    #'wide-resnet_3216531871'#my try
    #,'wide-resnet_223312785' #my try 2
    #'nin_1412731440',#actual nin
    #'nin_1571722525' #my v1 with ReLU
    #,'nin_289271600' #prelu, more grouped, erratic
    #,'nin_515024378' #elu, less grouped, one of the last layers uncommented, still pretty random.. Maybe, lessen LR?
    #,'nin_738527557' #elu, lr=0.04, actually, possibly same le=0.04
    #,'my_21806556' #0.01
    #,'my_864020577' #0.03
    #,'my_2600711329'
    #    'nin-my-v1_2765811950' #cifar100
        #,'nin-my-v1_2919629431'
        #,'nin-my-v1_23652885'
    #    ,'nin-my-v3_256419241'
        #,'nin-my-v5_2364614713'
        #,'nin-wide-narrow-faster_99539'
        #,'nin-wide-narrow-faster-prelu_2479214368'
        #,'nin-my-v7_2601230772'
        #,'nin-my-grouped-in-6_602315048'
        #,'nin-my-grouped-in-12-wider-end_1020922938'
        #,'my-max-min-out-less-grouped_1233812178'
        #,'my-nin-2_193724647' #almost 100 epochs, not so good
        #'nin-my-grouped-in-12-wider_134273880' #max 89
        #,'nin-my-grouped-in-3-except-1x1_21068430' #try 1, 0.12 start
      #  ,'nin-my-grouped-in-3-except-1x1_153776242'
            #try 2, 0.1 start, selected best 1-st epoch beginning, lrstep 10
        #,'nin-my-try-at-pure_140742781' # LR step 1
        #'my-wide-resnet-group-4_561820415'
        #,'my-wide-resnet-group-16_269972853'
        
        #,'my-wide-resnet-32_3159028787' 
        #best cifar 10
        
        #,'my-wide-resnet-speedy_36635370'
        #,'my-wide-resnet-speedy_3141232552'
        #,'my-wide-resnet-fully-even-more-hardcore_2934427607'
        #,'my-wide-resnet-fully-even-more-hardcore_234423536'
        #,'my-wide-resnet-fully-even-more-hardcore_77223188' #0.25 LR, lol
        #'my-wide-resnet-fully-even-more-hardcore-mod2_1724829314' #~77.5% cifar100 8-wide, 28-deep
        #, 'wide-resnet_1576110926' #same parameters as above, original wide-resnet
        #,  'wide-resnet_23899967'   #same

        #'wide-res-restructured_858525423' #8|28 restructured --- wider grouped instead of serial
        
        #'wide-res-restructured-pooled_1841829859' # 2|40
        #,'wide-res-restructured-pooled_1939828801' # 2|40
        #,'wide-res-restructured-pooled-load_228242059'
        
        #,'wide-resnet_78636791'                    # 2|40
        
        #'wide-resnet_13438489'             # with weight decay | 2|40
        #,'wide-resnet-2-40-load_187609537'  # continued
        
        #,'wide-res-restructured-pooled-load_2266820541' # continued from low-LR start
                                                        # seems to be getting stuck under 50%
        #'wide-res-restructured-pooled_505413458'  # 25.2 error  2|40   weight decay 0.0005
        
        #,'wide-resnet_2837615435' # 32-wide, 10-deep original
        #,'my-hardcore-mod5_230255367' # 8-wide, 28-deep fully grouped, 4x wider block middle. 64-batch
        #,'my-hardcore-mod5_67778080'   # 8-wide, 28-deep fully grouped, 4x wider block middle. 64-batch
                                       # lr 0.3, much faster decrease
        #,'g2-FIXED_264943616' # FIXED same cifar100 8-wide, 28-deep , 8-per-g with 64  batch and LR 0.03
        #,'g2-FIXED_1100216456' # FIXED same cifar100 8-wide, 28-deep , 8-per-g with 128 batch and LR 0.1
        
        #,'original-theory_1667325005' # 40-deep 1-wide.
                                      # [8 3x3 convs per feature -> 1x1 combine]
                                      # Unmodified net's optimistic result from paper: 31% error
        
        #,'original-theory-1g-no1x1_711323990' # no shitty 1x1s, only 1-per-g
                                              # Accidentally forgot to put RELUs back, so only 1 relu/block
        
        #'original-theory-1g-no1x1-faster_479432054' # Logically, pretty far from the original theory, but
                                                     # much faster cudnn-de-facto
                                                     # due to smaller #G and same width as original.
                                                     # A pretty "light" modification of the original.
                                                     # Should have, ~2x theoretical speedup vs original.

        #,'wide-resnet_2789929900' # original 1-wide 40-deep, seems like it's not actually 31%..

      #'supershallow-1-original_1303015497' #            1-layer 12-wide,  huge pool
        
        #,'supershallow-2-original_151022133'  # huge pool, 1-layer 256-wide, huge pool

        #,'supershallow-1-g_217315085'            #    grouped 1-layer 36-wide,    huge pool
     #,'supershallow-1-g-lin_321198353'         #    grouped 1-layer 36-wide,    huge pool , additional linear
    #,'supershallow-1-nog-lin_2147619568'      #  ungrouped 1-layer 36-wide,    huge pool , additional linear
        #,'supershallow-1-g-lin-2xWider_106358140'#    grouped 1-layer 36*2-wide,  huge pool , additional linear
        # 'supershallow-72g-36x1x1-lin_1295417462'
        # 'supershallow-72g-36x1x1-lin_848619725' # momentum=0.1, no nesterov
        #,'supershallow-72g-36x1x1-lin_931525683' # momentum=0  , no nesterov
        
        #,'supershallow-36nog-36x1x1-lin_1625922883'
        #,'original-theory-1g-no1x1-faster_2662522032'        # momentum=0  , no nesterov
        #,'original-theory-1g-no1x1-faster_1785221836'        # momentum=0.2, no nesterov
        
        #,'original-theory-1g-no1x1-faster-second_2615717994' # momentum=0.2, nesterov, lr=0.01
        #,'original-theory-1g-no1x1-faster-second_1967530762' # momentum=0.2, nesterov, lr=0.15, steps
        #,'original-theory-1g-no1x1-faster-second_2811127475' # momentum=0.2, nesterov, lr=0.15, longer steps
        #,'original-theory-1g-no1x1-faster_301286226'
        #,'original-theory-1g-no1x1-faster-second-bnless_79729396'
        #,'original-theory-prelu_29319120'
        #,'wide-resnet-1x3_312228145'
        #,'wide-resnet-1x3_676421281'
        
        #,'original-theory-1g-no1x1-faster-second_1077318781' #batch 1024,   nesterov, 0.2 momentum
        #,'original-theory-1g-no1x1-faster-second_3185614107' #batch 512 ,no nesterov, 0.6 momentum
                                                             # 4 / 28  
        #,'original-theory-1g-no1x1-faster-second_269817566'  #batch 512 ,no nesterov, 0.95 momentum
        #,'original-theory-1g-no1x1-faster-second_33983860'
        #,'original-theory-1g-no1x1-faster-second-pow_2769621779'
        #,'nin-original_154497595'
        #,'original-theory-1g-no1x1-faster-second_1679927654'
        #,'original-theory-1g-no1x1-faster-second_290136978'
        #,'original-theory-1g-no1x1-faster-second_1977331676' #non-white data, 256 batch, no nesterov, ~0.8 mom.
        #,'original-theory-1g-no1x1-faster-second_2468631929' #non-white data, 128 batch, nesterov, 0.9 mom. 12-28
        
        #,'original-theory-1g-no1x1-faster-second_2101774' # 1-40 same as above
        #,'original-theory-1g-no1x1-faster-wider_3085211797' # 2x wider grouped part
        #,'original-theory_146445625' # 1-40 | 128 | nester 0.9, looks fine
        
        #,'original-theory-plus-relu_2179628207' # 1-40 | 64 | nester 0.9 | 1->16 Gs, relatively good, ~33% error
        #,'original-theory-relu-non-quadratic_133348844' # widen_factor=1 depth=22
        #,'original-theory-relu-non-quadratic_104835296' # widen_factor=6 depth=22 | 32-batch

        #'wide-res-4-layer-2x2_1347012402' # widen_factor=12 depth=22 | 32-batch
        #,'wide-res-4-loaded_151947303' # continuation

        #,'nin-my-grouped-in-3-except-1x1_1678316999' # now without first-layer-group | 256 batch

        #,'nin-my-grouped-in-3-except-1x1-3x3_227025842'
        #,'nin-my-grouped-in-3-except-1x1-3x3_198056681' # 32 batch, also removed the pre-last 1x1
        #,'nin-my-grouped-2x2_3071619002' # 32 batch
        
        'nin-my-grouped-2x2-volumetric_305861681' # ~72%
        ,
        #,'nin-my-grouped-2x2-volumetric-2_111817341' # less grouped, ~74% (~26%err)
        #,'nin-my-grouped-2x2-extended_2503416157'
        
        #,'nin-my-grouped-2x2_44601682' # ~70%, tiny
        #,'nin-my-grouped-2x2_811714610' # white,      weight decay, 13 epochs
        #,'nin-my-grouped-2x2_875523394' # "combined", weight decay, 
        #,'nin-my-grouped-2x2_860820171' # combined, wd = 0.0002 ~71%
        
        #,'wide-res-restructured-pooled-load_1196428374' # don't mind the name, above continued for longer LR decay
        
        #,'nin-my-grouped-2x2-volumetric-deep_2411223178'
        
        #,'nin-my-grouped-2x2-volumetric-deep-more-g-wider-end_176441667'
        #,'nin-my-grouped-2x2-volumetric-deep-fattop_87171845'    # worse than wider-end, most likely
                                                                 #, though loss seems smaller

        #,'nin-my-grouped-2x2-volumetric-2-colors_1262728099' # volumetric-2 + 256x 1x1 after colors
        #,'nin-my-grouped-2x2-volumetric-2-colors-2_660014296' # 75%, a bit better than volumetric-2. Could be init
        #,'nin-my-grouped-2x2-volumetric-2-colors-3_952116257'# +weight decay | changed some params in first part
        
        
        'wide-res-restructured-pooled-actually_2153616159'  # 18.3%, 80m params
                                                             # wd=0.0005, width 12, depth 40 | 2 Gs
                                                             # BEST!
        
        
        ,'wide-res-restructured-pooled-actually-dot_2222810871' # 29%
                                                                # 0.55m 25sec 1|40 wd0.0003
                                                                # AWESOME, needs testing with larger # params
                                                                # training error hasn't reached 0. Near 2%
        
        #,'wide-res-restructured-pooled-actually-dot_2563920990' # 25% 8.7m  103sec  4|40  worse than original
        #,'load_2903631409'                                      #-continuing with slower LR curve
        
        
        
        #,'wide-res-restructured-pooled-actually-dot-g_248252290'
        #,'wide-res-restructured-pooled-actually-dot-g_2296015296' # lr.08
        #,'wide-res-restructured-pooled-actually-dot-g_99352853' # batch 32
        #,'wide-res-restructured-pooled-actually-dot-g_99410090' 
        #,'wide-res-restructured-pooled-actually-dot-g_13633485' # 1m params 112|2
        
        #,'wide-res-restructured-pooled-actually-dot-g_2608023767' # actually 4x serial (like 28) | 6G x 2W 
        
        #,'wide-res-restructured-pooled-actually-dot-3g_194693081' # 3Gs
        
        #,'wide-res-restructured-pooled-actually-dot-3g-norm-2_181415609' #4Gs, norm linear end | 1|28
        #,'wide-res-restructured-pooled-actually-dot-3g-norm-2_1547231421' #same no plain linear | 1|28
        #,'wide-res-restructured-dot-8g-33_2480418496'
        #,'load_1310416718' # raised wd = 0.0008
        
        # 64-batch
        
        #,'hoard_3134028086'   # 27%   1.5m  25 sec
        
        #,'hoard-2_2013123263' #      21.3m  100sec   nice train/test
        #,'hoard-3_2319621184'#         7m  90 sec   88 convs   most 2x2   narrow rooot    pool from 4x4
        #,'hoard-4-concat_97446298' # awful
        #,'hoard-4-concat_1871431746'#0.6m  21 sec  started out better than the test above
        #,'hoard-5-concat_1237916829 #3.3m  67 sec sucked
        #,'hoard-6_303047508'  #        1m  40 sec  doesn't seem good
        #,'hoard-2-2_202719005' #     1.7m  30 sec
        #,'hoard-2-3_1984528733'#       2m  36 sec                 great start, not so great later on
        #,'hoard-2-3-with-general-convs_1647424637'# 2.7m  40 sec  great start, not so great later on
        #,'hoard-2-4-with-general-convs_3054819040'# 6.8m  55 sec  nice train/test   elu
        #,'hoard-2-5-reducing_73026470'            # 6.8m  65 sec
        #,'hoard-2-7-regressive_84143421'          # 2.6m  60 sec
        #,'hoard-2-8_2725121730'                   # 1.5m  45 sec  nice
        #,'hoard-2-9_1144717097'                   #  19m 145 sec  20.5%
        #,'hoard-2-x_2442625743'                   #  24m 220 sec  19.5%   --- better.
                                                  #  Though, here training was "supervised"
        #,'load_276695612'   # continuing the last one since e52 (which was awesome) with different hyperparams
                            # to be continued
        ,'load_59251794'    #-here it is. 19.5%
        #,'load_705010866'   # let it play around ~without wd for some more. Nothing good seems to come out
        
        #,'hoard-2-x1-fraction-concat_35199367'  # wd.0007
        #,'hoard-2-x2-upscale_887712769' # 24.5% 11.6m 380sec batch21 wd.0004 lr.6-faster-fall seq=1
        #,'load_2782812357'              # continued with batch32, after noticing the mistake of using b21
                                        # continued from LR.04 with even faster LR fall/epoch
                                        # still 355 sec, so not a lot of profit from rasing batch size 1.5x
                                        # fantastic train/test
        #,'load_1351222291'              # 24% -- last one continued. Still, bad accuracy vs #params
        #,'load_2906011235'              # 8batch
                                        #-loaded for relearning from LR.03 with larger momentum, wd.0004, slower fall

        #,'hoard-2-x3_1261924300'        # quickie-quackie LR.05. SpatialAdaptiveMaxPooling and Frational
                                        # 25.5% 31.5m  470 sec  32batch   probably too sharp of a fall
        ,'hoard-2-x3_1863512264'
        
        #,'wide-resnet_146518293' # 29.5% usual 1|40 on non-white
        #,'wide-resnet_1424914631'#  30.2%
        
        #,'wide-resnet_3193017188'# white
        #,'wide-resnet_19368532'  # white #2
        #,'wide-resnet_311427880' # 28|12 b128  cifar-10-w
        #,'wide-resnet_1083320907'# 28|12 b16
        
        #,'wide-resnet_729617957' # 28|12 b128   52.6m  280 sec  cifar-100 standard   18.9%
         ,'load_7942550'
        #,'wide-resnet_524426751' # 28|12 b16
         #,'load_247009327'
        
        ,'hoard-2-x_1053922579'  # 2d|3seq         43m  375 sec  cifar-100 standard
    
])