In [1]:
%pylab inline
import matplotlib.pyplot as plt
import seaborn

import pandas as pd
import numpy as np
import models.nestd
import os
import json
from theanomodels.utils.misc import loadHDF5

Populating the interactive namespace from numpy and matplotlib


Using gpu device 0: Tesla K20Xm (CNMeM is disabled, cuDNN not available)


In [2]:
def extract_configs(rootdir,filename='config.json'):
    files = {}
    for dirpath, dirnames, filenames in os.walk(rootdir):
        if filename in filenames:
            with open(os.path.join(dirpath,filename),'r') as f:
                files[dirpath] = json.loads(f.read())
    return pd.DataFrame(files)

## Find files named 'config.json' and load them into a dataframe

In [3]:
configs = extract_configs('output/001_LogGamma/')
configs.iloc[:5]

Unnamed: 0,output/001_LogGamma/cw128_seed1
KL_loggamma_coef,1
LogitNormalMP,3
alpha_inference_layers,2
annealBP,1
annealBound,1


## Get differences between runs

In [4]:
keys_that_are_different = []
keys_to_omit = ['timestamp']
for k in configs.index:
    if k not in keys_that_are_different:
        if (configs.T.duplicated(k,keep='first')==False).sum()>1:
            keys_that_are_different.append(k)

In [5]:
configs.loc[keys_that_are_different]

Unnamed: 0,output/001_LogGamma/cw128_seed1


## Generate a name-mapping

In [6]:
def generate_name(key,value):
    if value is None or value==False:
        return ''
    elif value==True:
        return key
    elif key=='model':
        return value
    else:
        return '%s=%s' % (key,value)

In [7]:
keys_to_always_include = ['model']
name_map = {}
for c in configs.columns:
    name = configs.loc[list(set(keys_to_always_include+keys_that_are_different)),c]
    name_str_list = filter(len,map(generate_name,*zip(*name.iteritems())))
    name = ' '.join(name_str_list)
    name_map[c] = name

In [8]:
name_map

{'output/001_LogGamma/cw128_seed1': u'LogGamma'}

## Load results

In [31]:
import models.nestdarrays

In [32]:
reload(models.nestdarrays)

<module 'models.nestdarrays' from 'models/nestdarrays.py'>

In [38]:
results = models.nestd.NestD({name_map[k]:loadHDF5(os.path.join(k,'output.h5')) for k in name_map})

In [39]:
results.apply(np.shape)

NestD{
 LogGamma: NestD{
   train: NestD{
     loss: (151,)
     p(x,y): NestD{
       bound: (151,)
       nllX: (151,)
       nllY: (151,)
       logbeta: (151,)
       eps: (151,)
       KL: (151,)
       U: (151,)
       KL_Z: (151,)
       alpha: (151,)
       objfunc: (151,)
       Z: (151,)
       NLL: (151,)
       KL_loggamma: (151,)}
     boundU: (151,)
     bound: (151,)
     pnorm: (151,)
     gnorm: (151,)
     epoch: (151,)
     objective: (151,)
     accuracy: (151,)
     boundL: (151,)
     p(x): NestD{
       bound: (151,)
       nllX: (151,)
       logbeta: (151,)
       eps: (151,)
       KL: (151,)
       U: (151,)
       KL_Z: (151,)
       alpha: (151,)
       objfunc: (151,)
       Z: (151,)
       NLL: (151,)
       KL_loggamma: (151,)}
     duration (seconds): (151,)
     classifier: (151,)}
   valid: NestD{
     loss: (16,)
     p(x,y): NestD{
       bound: (16,)
       nllX: (16,)
       nllY: (16,)
       logbeta: (16,)
       eps: (16,)
       KL: (16,)
   

In [40]:
results[:,:,['pnorm']].apply(np.shape)

NestD{
 LogGamma: NestD{
   train: NestD{
     pnorm: (151,)}
   valid: NestD{ }}}

In [41]:
results[:,:,['pnorm']].apply(np.shape).to_dict()

{u'LogGamma': {u'train': {u'pnorm': (151,)}, u'valid': {}}}

In [42]:
dict(results[:,:,['pnorm']].apply(np.shape))

{u'LogGamma': NestD{
 train: NestD{
   pnorm: (151,)}
 valid: NestD{ }}}

In [43]:
results[:,:,['pnorm']].apply(np.shape).prune()

NestD{
 LogGamma: NestD{
   train: NestD{
     pnorm: (151,)}}}

In [47]:
results[:,'train',['accuracy','loss']].apply(np.shape)

NestD{
 LogGamma: NestD{
   loss: (151,)
   accuracy: (151,)}}

In [45]:
'{:0.2f}'.format(3.234234)

'3.23'

In [None]:
plot_settings = {
    'accuracy': {
        'best_val_func': max,
        'best_val_format': '{:0.2f}',
        'legend_loc': {'loc':'lower right'},
        'ylim':(0,1),
    },
    'loss': {
        'best_val_func': min,
        'best_val_format': '{:0.0f}',
        'legend_loc': {'loc':'upper right'},
        #'ylim':(0,1),
    },
    
}

def plot_result(x,y=None,label=None,title=None,best_val_func=None,best_val_format=None,legend_loc=None,
                xlim=None,ylim=None,xlabel=None,ylabel=None,**plt_kwargs): 
    if y is None:
        y = x
        x = np.arange(len(y))
    if label is not None and best_val_func is not None:
        best_val = best_val_format.format(best_val_func(y))
        label = '%s %s' % (best_val,label)
    plt.plot(x,y,label=label,**plt_kwargs)
    if legend_loc is not None:
        plt.legend(**legend_loc)
    if title is not None:
        plt.title(title)
    if xlabel is not None:
        plt.xlabel(xlabel)
    if ylabel is not None:
        plt.ylabel(ylabel)
        
def plot_result_set(x,y=None,**kwargs):
    if y is None:
        y = x
        x = None
    


    
    
        
        
        
        
    for i,(key,f,yscale,ylim,loc) in enumerate(zip(keys,funcs,yscales,ylims,locs)):
        #print i,key
        plt.subplot(1,3,i+1)
        if label_prefix == None:
            label = 'best %s = %0.2f' % (key,f(results[key]))
        else:
            label='%s: best %s = %0.2f' % (label_prefix,key,f(results[key]))
        plt.plot(results['epochs'],results[key],label=label)
        plt.legend(loc=loc)
        plt.yscale(yscale)
        plt.title('%s %s' % (title,key))
        if ylim is not None:
            plt.ylim(ylim)

In [None]:
def plotExperiment(results,
                   title = '',
                   keys = ['cost','bound','accuracy'],
                   funcs = [min,min,max],
                   yscales = ['linear','linear','linear'],
                   ylims = [(100,600),(100,600),(0,1)],
                   locs = ['upper right','upper right','lower right'],
                   label_prefix=None
                   ):
    plt.figure(1,figsize=(18,4))
    for i,(key,f,yscale,ylim,loc) in enumerate(zip(keys,funcs,yscales,ylims,locs)):
        #print i,key
        plt.subplot(1,3,i+1)
        if label_prefix == None:
            label = 'best %s = %0.2f' % (key,f(results[key]))
        else:
            label='%s: best %s = %0.2f' % (label_prefix,key,f(results[key]))
        plt.plot(results['epochs'],results[key],label=label)
        plt.legend(loc=loc)
        plt.yscale(yscale)
        plt.title('%s %s' % (title,key))
        if ylim is not None:
            plt.ylim(ylim)

In [57]:
pd.DataFrame(results.walk())

Unnamed: 0,0,1
0,"(LogGamma, train, loss)","[8.74946e+07, 91788.5, 87505.0, 92981.9, 80126..."
1,"(LogGamma, train, p(x,y), nllX)","[781.506, 193.536, 191.448, 183.55, 169.172, 1..."
2,"(LogGamma, train, p(x,y), logbeta)","[-0.235697, -1.00667, -1.15575, -0.997878, -1...."
3,"(LogGamma, train, p(x,y), nllY)","[3.78404, 2.33661, 2.11648, 2.35015, 2.00719, ..."
4,"(LogGamma, train, p(x,y), bound)","[5.27287e+07, 24293.4, 23185.8, 25109.6, 20831..."
5,"(LogGamma, train, p(x,y), KL)","[5.26501e+07, 4706.15, 3829.42, 6519.66, 3713...."
6,"(LogGamma, train, p(x,y), eps)","[0.00123112, -0.000347916, -0.00162066, 0.0009..."
7,"(LogGamma, train, p(x,y), objfunc)","[-2.27731e+08, -435167.0, -451982.0, -417681.0..."
8,"(LogGamma, train, p(x,y), U)","[-32.989, -8.18494, -8.71878, -9.32058, -7.709..."
9,"(LogGamma, train, p(x,y), KL_Z)","[526016.0, 3.22634, 3.60051, 5.54824, 8.78736,..."


In [56]:
pd.DataFrame({'a':[np.random.randn(4),2,3],'b':range(3,6)})

Unnamed: 0,a,b
0,"[0.0315762198388, -0.0647093483186, -0.5671997...",3
1,2,4
2,3,5


In [55]:
import numpy as np

In [21]:
class thing(object):
    def __getitem__(self,x):
        print x,type(x)
    def func(self,*args):
        print args

In [40]:
thing()[:,:]

(slice(None, None, None), slice(None, None, None)) <type 'tuple'>


In [42]:
thing()[(1,2,3),(3242)]

((1, 2, 3), 3242) <type 'tuple'>


In [24]:
type(np.random.randn(3))

numpy.ndarray

In [26]:
thing().func([:,:])

SyntaxError: invalid syntax (<ipython-input-26-ead0e3e3cd48>, line 1)

In [44]:
a = (1)

In [45]:
type(a)

int

In [29]:
(1,2,3)[1:]

(2, 3)

In [30]:
s = slice(None,None,None)

In [33]:
s.start

In [35]:
(1,2,3)

(1, 2, 3)

In [7]:
def stuff(x=123,*args,**kwargs):
    print x,type(x),args

In [8]:
stuff((1,),2,3,4,5)

(1,) <type 'tuple'> (2, 3, 4, 5)


In [None]:
tWeights = {
    'weights': {
        'w1':nparray,
        'w2':sdddd
    },
    'hyperparameters':{
        ...
    }
    
}
tOutputs = {
    'train':{
        'p(x)':{...},
        'p(x,y)':{...}
    }
    'eval':{
        'p(x)':{...},
        'p(x,y)':{...}
    }
}

In [None]:
self.tWeights = self.tWeights['hyperparameters']
self.tOutputs = self.tOutputs['hyperparameters']

In [23]:
class A(object):
    
    def __recreate__(self):
        return self.__class__()
    
class B(A):
    def __recreate__(self):
        return super(B,self).__recreate__()

In [25]:
B().__recreate__()

<__main__.B at 0x7f18bbe9cf90>

In [26]:
b = B()

In [28]:
b.__new__()

TypeError: object.__new__(): not enough arguments