# CN_Network

## Imports

In [1]:
import tensorflow as tf
from numpy import prod, savez, load
from os import path, makedirs
from pprint import pprint

In [2]:
class Flat_CNN(object):
    """A neural network of multi-path layers.
    Filters for each path have shape_height = 1 (hence, Flat)."""
    
    def __init__(self,
                 name = 'Flat_CNN',
                 wide_filter_widths = [],
                 width_reduction_factors = [],
                 dtype = tf.float32,
                 num_freq_channels = 1024,
                 learning_rate = 0.0001,
                 cost_name = 'MSE',
                 threshold = 0.00625,
                 g_shift = 0.01):
        
        self.params = {"width_reduction_factors" :width_reduction_factors,
                       "wide_filter_widths" : wide_filter_widths,
                       "num_freq_channels" : num_freq_channels,
                       "learning_rate" : learning_rate,
                       "cost_name" : cost_name,
                       "threshold" : threshold,
                       "g_shift" : g_shift,
                       "dtype" : dtype,
                       "name" : name}
        self._set_params()
        self._save_params()
        
    def _set_params(self):
        self.name = self.params['name']
        self.wide_filter_widths = self.params['wide_filter_widths']
        self.width_reduction_factors = self.params['width_reduction_factors']
        self.dtype = self.params['dtype']
        self.num_freq_channels = self.params['num_freq_channels']
        self.learning_rate = self.params['learning_rate']
        self.cost_name = self.params['cost_name']
        self.threshold = self.params['threshold']
        self.g_shift = self.params['g_shift']
        
    def _save_params(self, param_direc = 'network_params/'):
        """Not safe - will overwrite existing file."""
        if not path.exists(param_direc):
            makedirs(param_direc)
            
        savez(param_direc + self.name, self.params)   
        
    def load_params(self, path):
    
        a = load(path + '.npz')
        d = dict(zip(("data1{}".format(k) for k in a), (a[k] for k in a)))
        self.params = d['data1arr_0'][()]
        self.params['name'] = self.name
        self._set_params()
        
        
    def print_params(self):
        pprint(self.params)
        
        
    def _quad_path_layer(self, input, wide_conv_width, strides, layer_name, num_1x1_conv_filters = 4):

        # convolution filters
        conv_filters = lambda shape : tf.get_variable(name = 'filters',
                                                      dtype = self.dtype,
                                                      shape = shape,
                                                      initializer = tf.contrib.layers.xavier_initializer())


        def _bias_add_scope(input, shape):
            """Creates a scope around a trainable bias and its addition to input"""
            with tf.variable_scope('add_bias'):

                bias = tf.get_variable(name = 'bias', dtype = self.dtype, shape = shape, initializer = tf.contrib.layers.xavier_initializer())
                bias_add = tf.nn.bias_add(input, bias)

            return bias_add


        def _conv_scope(input, filter_shape, strides, scope_name = 'convolution'):
            """Creates a scope around a convolution."""
            with tf.variable_scope(scope_name):

                conv = tf.nn.conv2d(input = input, filter = conv_filters(filter_shape), strides = strides, padding = 'SAME') 
                conv = _bias_add_scope(conv, [filter_shape[-1]])
                conv = tf.nn.relu(conv)
                conv = tf.nn.dropout(conv, self.conv_keep_prob)

            return conv

        def _avg_scope(input, strides, num_conv_filters):
            """Creates a scope around the average-pool path."""
            with tf.variable_scope('average'):
                avg_pool = tf.nn.avg_pool(value = input, ksize = strides, strides = strides, padding = "SAME")

                convolution_filter_shape = [1,1,avg_pool.get_shape().as_list()[3], num_conv_filters]
                avg = _conv_scope(avg_pool, convolution_filter_shape, [1,1,1,1], scope_name = "1x1_conv")

            return avg

        def _max_scope(input, strides,  num_conv_filters):
            """Creates a scope around the max-pool path"""
            with tf.variable_scope('max'):
                max_pool = tf.nn.max_pool(value = input, ksize = strides, strides = strides, padding = "SAME")

                convolution_filter_shape = [1,1,max_pool.get_shape().as_list()[3],num_conv_filters]
                max_ = _conv_scope(max_pool, convolution_filter_shape, [1,1,1,1], scope_name = "1x1_conv")

            return max_

        def _filter_cat_scope(filters):
            """Creates a scope around filter concatation (layer output)"""
            with tf.variable_scope('filter_cat'):
                filter_cat = tf.concat(filters, 3)
            return filter_cat

        ######

        with tf.variable_scope(layer_name):

            narrow_conv_width = wide_conv_width / 2

            num_narrow_conv_filters = num_1x1_conv_filters / 2
            num_wide_conv_filters = num_narrow_conv_filters / 2

            _1x1_strides = [1,1,1,1]

            avg_output = _avg_scope(input, strides, num_1x1_conv_filters)
            max_output = _max_scope(input, strides, num_1x1_conv_filters)

            inital_conv = _conv_scope(input, [1,1,input.get_shape().as_list()[3],num_1x1_conv_filters], [1,1,1,1], '1x1_conv')

            narrow_convolution = _conv_scope(inital_conv, [1,narrow_conv_width,inital_conv.get_shape().as_list()[3],num_narrow_conv_filters], strides, scope_name = 'narrow')
            wide_convolution = _conv_scope(inital_conv, [1,wide_conv_width,inital_conv.get_shape().as_list()[3],num_wide_conv_filters], strides, scope_name = 'wide')

            catted_filters = _filter_cat_scope([avg_output, narrow_convolution, wide_convolution, max_output])

        return catted_filters
        
    def create_graph(self):
        
        
        
        # creates the network graph
        tf.reset_default_graph()

        # Note, tf.placeholder() are assigned by tf.Session()

        with tf.variable_scope('keep_probs'):
            # Dropout rate = 1 - keep_prob

            # probability of keeping sample_keep_prob
            # suggest 0.8
            self.sample_keep_prob = tf.placeholder(self.dtype, name = 'sample_keep_prob')

            # probability of keeping convolution output
            # suggest 0.9
            self.conv_keep_prob = tf.placeholder(self.dtype, name = 'conv_keep_prob')

            # probability of keeping fully connected layer output
            # suggest 0.95
            self.fcl_keep_prob = tf.placeholder(self.dtype, name = 'fcl_keep_prob')        

        with tf.variable_scope('sample'):
            # holds the 1 x num_channels samples that are fed into the network
            self.X = tf.placeholder(self.dtype, shape = [None, 1, self.num_freq_channels, 1], name = 'X')
            self.X_dropout = tf.nn.dropout(self.X, self.sample_keep_prob)

        self.layers = []
        num_layers = len(self.wide_filter_widths)
        layer_names = ['layer_{}'.format(i) for i in range(num_layers)]

        for i in range(num_layers):
            # previous layer is input for current layer
            input = self.X_dropout if i == 0 else self.layers[i - 1]
            strides = [1, 1, self.width_reduction_factors[i], 1]
            q_layer = self._quad_path_layer(input, self.wide_filter_widths[i], strides, layer_names[i])
            self.layers.append(q_layer)
                
        with tf.variable_scope('fcl_1'):
            
            fcl_1 = tf.contrib.layers.flatten(self.layers[-1])
            fcl_1 = tf.contrib.layers.fully_connected(fcl_1, 1024)
            fcl_1 = tf.nn.dropout(fcl_1, self.fcl_keep_prob)
                
            self.layers.append(fcl_1)
            
        with tf.variable_scope('fcl_2'):
            
            fcl_2 = tf.contrib.layers.flatten(self.layers[-1])
            fcl_2 = tf.contrib.layers.fully_connected(fcl_1, 32)
            fcl_2 = tf.nn.dropout(fcl_2, self.fcl_keep_prob)
                                
            self.layers.append(fcl_2)
            
        with tf.variable_scope('prediction'):
            reshape_final_layer = tf.reshape(self.layers[-1], [-1, prod(self.layers[-1].get_shape().as_list()[1:])])
            prediction_weight = tf.get_variable(name = 'weight', shape = [prod(self.layers[-1].get_shape()[1:]), 1.], dtype = self.dtype, initializer = tf.contrib.layers.xavier_initializer())
            pred_times_weight = tf.matmul(reshape_final_layer, prediction_weight)
            self.predictions = tf.nn.bias_add(pred_times_weight, [1])

        with tf.variable_scope('targets'):
            self.targets = tf.placeholder(dtype = self.dtype, shape = [None, 1], name = 'targets')

        with tf.variable_scope('costs'):

            error = tf.subtract(self.targets, self.predictions, name = 'error')
            squared_error = tf.square(error, name = 'squared_difference')

            with tf.variable_scope('mean_inverse_shifted_gaussian'):
                with tf.variable_scope('normal_distribution'):
                    
                    sigma = tf.constant(self.threshold, name = 'sigma')
                    normal_dist = tf.contrib.distributions.Normal(0.0, sigma, name = 'normal_dist')
                    gaussian_prob = normal_dist.prob(error, name = 'gaussian_prob')
                    shifted_gaussian = tf.add(gaussian_prob, .01, name = 'shifted_gaussian')        
                    shift = tf.constant(self.g_shift, name = 'gaussian_shift_value')
                self.MISG = tf.reduce_mean(tf.divide(shift, shifted_gaussian), name = 'mean_inverse_shifted_gaussian')

            with tf.variable_scope('mean_squared_error'):
                self.MSE = tf.reduce_mean(squared_error)

        with tf.variable_scope('train'):
            
            cost = self.MSE if self.cost_name == 'MSE' else self.MISG
            LR = tf.constant(self.learning_rate, name = 'learning_rate')
            
            self.optimizer = tf.train.AdamOptimizer(LR, epsilon=1e-08).minimize(cost)

        with tf.variable_scope('logging'):  

            with tf.variable_scope('image'):
                self.image_buf = tf.placeholder(tf.string, shape=[])
                epoch_image = tf.expand_dims(tf.image.decode_png(self.image_buf, channels=4), 0)

            with tf.variable_scope('percent_within_threshold'):
                self.PWT = 100.*tf.reduce_mean(tf.cast(tf.less_equal(self.targets - self.predictions, sigma), self.dtype) )


            tf.summary.histogram(name = 'targets', values = self.targets)
            tf.summary.histogram(name = 'predictions',values =  self.predictions)
            tf.summary.scalar(name = 'MSE', tensor = self.MSE)
            tf.summary.scalar(name = 'MISG', tensor = self.MISG)
            tf.summary.scalar(name = 'PWT', tensor = self.PWT)
            tf.summary.image('prediction_vs_actual', epoch_image)
            self.summary = tf.summary.merge_all()
            
        print('Graph Ready')

## Usage

### Creating a new network

Wide filters will have widths of...
`wide_filter_widths = [64,32,8]`

Layers will downsample by ...
`width_reduction_factors = [2,3,4]`

Kansas is really flat ...
`Kansas = Flat_CNN(name = 'Kansas-A',
                  wide_filter_widths = wide_filter_widths,
                  width_reduction_factors = width_reduction_factors)`

`Kansas.print_params()`

`{'cost_name': 'MSE',
 'dtype': tf.float32,
 'g_shift': 0.01,
 'learning_rate': 0.0001,
 'name': 'Kansas-A',
 'num_freq_channels': 1024,
 'threshold': 0.00625,
 'wide_filter_widths': [64, 32, 8],
 'width_reduction_factors': [2, 3, 4]}`

Before a training loop:
`Kansas.create_graph()`

### Recreate a previous network, (create a new network with the same parameters as an old network)
 - currently the loading of previously trained weights is handed by the separate training function.
 - On reload can change all settings except for:
  - num_freq_channels
  - dtype (pretty sure)
  - wide_filter_widths
  - width_reduction_factors

Pancakes are flat... like Kansas
`Pancake = Flat_CNN(name = 'Pancake-A')`

`Pancake.load_params('network_params/Kansas-A')`

`Pancake.print_params()`

Before training loop

`Pancake.create_graph()`