# Modify the 12-target classifier for low-rank classification

This modifies the 12-target (i12), segnet classifier to use low rank filters. 
Before using this you should:
- Have pycaffe (from https://github.com/alexgkendall/caffe-segnet) setup and on the path
- Have a copy of solver.prototxt
- Have a copy of the 12-target segnet training classifier (e.g. `../scripts/i12/training.prototxt`)
- Have a copy of the 12-target segnet inference classifier (e.g. `../scripts/i12/deploy/deploy.prototxt`)

Table of contents:
- [Utility Functions](#Utility-Functions)
- [Add Low Rank Filter (Function)](#Add-Low-Rank-Filters)
- [Generate the Training Net](#Generate-the-net-to-use-for-training)
- [Generate the Inference Net](#Generate-the-net-to-use-for-inference-/-testing)
- [Test the Net](#Final-Test)
- [Modify the Solver Prototxt](#Set-the-solver-parameters-to-use-the-new-net)

# Utility Functions

In [None]:
%pylab notebook

In [None]:
import os
import google.protobuf.text_format
import caffe
caffe.set_mode_cpu()

In [None]:
# For debugging (cell can be removed from final notebook)
from IPython.core.debugger import Tracer
set_trace = Tracer()

In [None]:
def read_net_proto(path):
    """Read a net from a prototxt file
    
    :param path: The path to a caffe network (.prototxt file)
    
    :return: The prototxt object
    :rtype: caffe.proto.caffe_pb2.NetParameter
    """
    net = caffe.proto.caffe_pb2.NetParameter()
    with open(path) as f:
        proto = f.read()
    google.protobuf.text_format.Parse(proto, net)
    return net
    

In [None]:
def summarize_net(train_net):
    """ Print a (not sooo short) summary of the net layers
    
    :param net: A net 
    :type net: caffe.proto.caffe_pb2.NetParameter
    """
    layers = list(train_net.layer)
    for i, layer in enumerate(layers):
        print "{:04}".format(i),
        print "\t{:15}\t{:15}".format(layer.name, layer.type),
        if layer.type=="Convolution":
            if layer.convolution_param.kernel_size > 0:
                print "\t{0:>2}x{0:<2}".format(layer.convolution_param.kernel_size),
            else:
                print "\t{:>2}x{:<2}".format(layer.convolution_param.kernel_w, layer.convolution_param.kernel_h),                
        else:
            print "\t{:5}".format(''),

        if "_D" in layer.name:
            print "DECODE"
        else:
            print "      "

    print "Total", len(layers), "layers"   

# Add Low Rank Filters
In order to force the net to use low rank filter, we do two different convolutions. First, a 1x9 horzontal convolution, followed by a 1x9 vertical convolution. The result of these two layers is a 9x9 rank 1 convolution. The idea is that we beleive this convolution will favor grid-shaped results. 

**NOTE:** In order to get the net to behave properly I added a batch normalization layer in between the horizontal and vertical convolutions. The batch normalization does not substantially change the nature of the filters -- they are still rank 1 filters. 

In [None]:
def modify_layers_for_anisotropy(orig_net):
    """Create a new net that is a copy of `orig_net`, with some layers modified 
    
    This modifies the square '_D' layers of `orig_net` whith two 1D (horizontal and vertical) convolutions. 
    
    :return: A copy of orig_net with additional layers. 
    """
    net= caffe.proto.caffe_pb2.NetParameter()
    net.CopyFrom(orig_net)
    layers = list(net.layer)
    new_layers = []
    for layer in layers:
        if layer.type == "Convolution" and layer.name.endswith("_D"):

            kernel_size = layer.convolution_param.kernel_size
            num_outputs = layer.convolution_param.num_output

            # Replace decoding convolution with a 1D horizntal filter
            hlayer = caffe.proto.caffe_pb2.LayerParameter()
            hlayer.CopyFrom(layer)
            hlayer.convolution_param.ClearField('kernel_size')
            hlayer.convolution_param.ClearField('pad')
            hlayer.convolution_param.kernel_w = kernel_size**2
            hlayer.convolution_param.pad_w = (kernel_size**2-1)/2
            hlayer.convolution_param.kernel_h = 1
            hlayer.convolution_param.pad_h = 0
            hlayer.name = layer.name + "_H"
            hlayer.top[0] = hlayer.name
            
            # add batch normalization
            hbn =  caffe.proto.caffe_pb2.LayerParameter()
            hbn.name = hlayer.name + "_bn"
            hbn.type="BN"
            hbn.bottom.append(hlayer.top[0])
            hbn.top.append(hlayer.top[0])
            hbn.param.add(lr_mult=1.0, decay_mult=1.0)
            hbn.param.add(lr_mult=1.0, decay_mult=0.0)
            hbn.bn_param.scale_filler.type='constant'
            hbn.bn_param.scale_filler.value=1.0
            hbn.bn_param.shift_filler.type='constant'
            hbn.bn_param.shift_filler.value=1.0

            # add a 1D vertical filter
            vlayer = caffe.proto.caffe_pb2.LayerParameter()
            vlayer.CopyFrom(layer)
            vlayer.convolution_param.ClearField('kernel_size')
            vlayer.convolution_param.ClearField('pad')
            vlayer.convolution_param.kernel_w = 1
            vlayer.convolution_param.pad_w = 0
            vlayer.convolution_param.kernel_h = kernel_size**2
            vlayer.convolution_param.pad_h = (kernel_size**2-1)/2
            vlayer.name = layer.name + "_V"

            vlayer.bottom[0] = hlayer.top[0]

            new_layers.append(hlayer)
            new_layers.append(hbn)
            new_layers.append(vlayer)
        else:
            new_layers.append(layer)
    
    while len(net.layer):
        net.layer.pop()
    net.layer.extend(new_layers)
    return net

# Generate the net to use for training

Before executing these cells, copy the net used to train the 'i12' classifier into this folder and name it 'training-net.prototxt'

In [None]:
train_net = read_net_proto('training-net.prototxt')

In [None]:
modified_train_net = modify_layers_for_anisotropy(train_net)
# summarize_net(modified_train_net)

In [None]:
def save_net_proto(path, net):
    new_proto = google.protobuf.text_format.MessageToString(net)
    with open(path, 'w') as f:
        f.write(new_proto)

In [None]:
save_net_proto('modified-training-net.prototxt', modified_train_net)

# Generate the net to use for inference / testing

Before executing these cells, copy the net used to do inference with the 'i12' classifier into this folder and name it 'original-inferenc-net.prototxt'

In [None]:
infer_net_path = 'original-inference-net.prototxt'
infer_net = read_net_proto(infer_net_path)

In [None]:
new_infer_net = caffe.proto.caffe_pb2.NetParameter()
new_infer_net =  modify_layers_for_anisotropy(infer_net)

# Make sure any newly added BN layers are set to do INFERENCE  (in-place)
for layer in new_infer_net.layer._values:
    if layer.type == 'BN':
        layer.bn_param.bn_mode=layer.bn_param.INFERENCE

# The i12 net did not do softmax or classification (argmax as part of the net.
# Here we add the softmax and argmax layers 
# modifies new_infer_net in-place
for layer in infer_net.layer._values[91:]:
    softmax = caffe.proto.caffe_pb2.LayerParameter()
    softmax.softmax_param.engine = softmax.softmax_param.CUDNN
    softmax.type = u'Softmax'
    softmax.name = layer.name.replace('conv-', 'prob-')
    softmax.top.append(softmax.name)
    softmax.bottom.append(layer.top[0])

    label = caffe.proto.caffe_pb2.LayerParameter()
    label.name=layer.name.replace('conv-', 'label-')
    label.top.append(label.name)
    label.bottom.append(softmax.top[0])
    label.type=u'ArgMax'
    label.argmax_param.axis = 1
    
    new_infer_net.layer.add().CopyFrom(softmax)
    new_infer_net.layer.add().CopyFrom(label)

In [None]:
save_net_proto('modified-inference-net.prototxt', new_infer_net)

# Save a non-bayesian version

In [None]:
print ','.join(set([layer.type for layer in new_infer_net.layer]))

In [None]:
dropouts = [layer for layer in new_infer_net.layer if layer.type == 'Dropout']
print "Found", len(dropouts), "dropout layers"

In [None]:
for dropout in dropouts:
    if dropout.dropout_param.HasField('sample_weights_test'):
        print "Dropout", dropout.name, "_was_ set to work during testing...",
        dropout.dropout_param.ClearField('sample_weights_test')
        print "not anymore though!"
    else:
        print "Dropout", dropout.name, "is NOT set to work during testing..."

In [None]:
save_net_proto('non-bayesian-inference-net.prototxt', new_infer_net)

# Final Test 
** Make Sure Things Load Right**
- The net should load without crashing everything
- The tops should ONLY be the 'label-' layers, or the 'loss' layers, if everything is connected properly

In [None]:
import caffe
caffe.set_device(0)
caffe.set_mode_gpu()

import facade_layers
reload(facade_layers)
net = caffe.Net(new_proto_path, 'deploy/test_weights.caffemodel', caffe.TRAIN)

results = net.forward()

If I have properly connected things, these should be just the 'loss' layers

In [None]:
results.keys()

In [None]:
del net

# Set the solver parameters to use the new net

In [None]:
solver = caffe.proto.caffe_pb2.SolverParameter()
Parse(open('solver.prototxt').read(), solver)

In [None]:
solver.net = os.path.abspath('modified-training-net.prototxt')

In [None]:
print solver

In [None]:
with open('solver.prototxt', 'w') as f:
    f.write(google.protobuf.text_format.MessageToString(solver))

At this point we can start training **almost**. This cost me 1000 iterations, I forgot to make the folder that will hold the snapshots. 

In [None]:
os.path.isdir(os.path.dirname(solver.snapshot_prefix))

In [None]:
try:
    os.makedirs(os.path.dirname(solver.snapshot_prefix))
    print "Since caffe won't do it for us (ugh!) I made the folder needed to hold our snapshots"
except OSError as e:
    print "Ok, it's alright, we must have already made that directory."

Now I should be able to 'start-training.sh'