# Porting concise to keras

## TODO

- [ ] conv2d inizialization with pwm
- [ ] smooth spline layer

### Concise architecture

Splines:
- `spline_score = X_spline %*% spline_weights`
- Transform:
  - `exp(spline_score)`
  - `spline_score + 1`

Linear features:
- `lm_feat = X_feat %*% feature_weights`

Model:
- conv2d, `padding = "valid", w = motif_base_weights`
- activation: exp or relu, bias = motif_bias
- elementwise_multiply: `hidden * spline_score`
- pooling: max, sum or mean (accross the whole model)
- Optionally: multiply by non-linear scaling factor (model fitting)
- `pool_layer %*% motif_weights + X_feat %*% feature_weights + final_bias`
- loss: mse
- optimizer: Adam, optionally l-BFGS

Regularization:
- motif_base_weights, L1: motif_lamb
- motif_weights, L1: lambd
- spline_weights:
  - `diag(t(spline_weights) %*% S %*% spline_weights)`, L2_mean: spline_lamb
  - spline_weights, L2 / n_spline_tracks: spline_param_lamb

## Conv2d pwm, motif initialization

In [17]:
from keras import layers as kl
from keras import regularizers as kr
from keras.initializers import Initializer
## Arguments:
## - init_motifs=["TATTTAT", ..., "ACTAAT"]
## - init_motifs_scale=1
## - init_motif_bias=0
## - init_sd_motif=1e-2

## TODO - define a new layer or use the existing conv?
## TODO - which conv functions do dragonn and deepcpg use?

## write the initializer function

## Num filters, width?

## Regularizer
l2_decay = 1
l1_decay = 1

from keras import backend as K


## TODO - implement
def _check_pwm_list(pwm_list):
    """Check the input validity
    """

    if invalid:
        raise ValueError("pwm_list invalid")
    raise NotImplementedError()

class PWMBiasInitializer(Initializer):
    
    def __init__(self, pwm_list=[]):
        self.pwm_list = pwm_list
        _check_pwm_list(pwm_list)
        
    def __call__(self, shape, dtype=None):
        
        ## TODO - think how to implement this
        ## TODO get the number of bases...
        max_effect = [pwm.max_effect() for pwm in self.pwm_list]
        
        ## TODO - get the number of biases
        return K.constant(0, shape=shape, dtype=dtype)
        
    def get_config(self):
        return {
            "pwm_list": self.pwm_list
        }

## TODO program also a PWM-bases bias initializer
## TODO - should we just initialize the mean???
class PWMKernelInitializer(Initializer):
    """truncated normal distribution shifted by a PWM
    
    # Arguments
        pwm_list: a list of PWM's or motifs
        stddev: a python scalar or a scalar tensor. Standard deviation of the
          random values to generate.
        seed: A Python integer. Used to seed the random generator.
    """

    def __init__(self, pwm_list=[], stddev=0.05, seed=None):
        self.stddev = stddev
        self.seed = seed
        self.pwm_list = pwm_list
        _check_pwm_list(pwm_list)
        ## TODO - define a PWM class ?! - define it by using pwm matrix or motif sequences...
        ## TODO - check the pwm_list & convert to array of the same shape
        ##        - consider the shape and pad, trim accordingly
        
        
    ## TODO - implement
    def _to_pwm_array(self, pwm_list, shape, dtype=None):
        """Convert the pwm_list to a single pwm array
        """
        raise NotImplementedError()
        return pwm_array

    def __call__(self, shape, dtype=None):
        return pwm_array + K.truncated_normal(shape, 
                                              mean=self._to_pwm_array(self.pwm_list, shape, dtype), 
                                              self.stddev,
                                              dtype=dtype, seed=self.seed)

    ## TODO - can PWM list be used? -> what are the reqirements of get_config?
    def get_config(self):
        return {
            'stddev': self.stddev,
            'seed': self.seed, 
            'pwm_list': self.pwm_list
        }

conv_l = kl.Conv1D(filters=128, kernel_size=11, 
                   kernel_regularizer=kr.L1L2(l1=l1_decay, l2=l2_decay), ## Regularization
                   padding="valid",
                   activation="relu", 
                   kernel_initializer=PWMKernelInitializer(pwm_list, stddev=0.1), ## TODO
                   bias_initializer=PWMBiasInitializer(pwm_list)
                   )

## Smooth spline layer

Arguments:
-  n_splines=None,
-  share_splines=False,  # should the positional bias be shared across motifs
-  spline_exp=False,    # use the exponential function
-  spline_lamb=1e-5,
-  spline_param_lamb=1e-5,

Computation:
- `spline_score = X_spline %*% spline_weights`
- Transform:
  - `exp(spline_score)  ## spline_exp == True`
  - `spline_score + 1   ## spline_exp == False`

Regularization:
- `diag(t(spline_weights) %*% S %*% spline_weights), L2_mean: spline_lamb`
- `spline_weights, L2 / n_spline_tracks: spline_param_lamb`

## Other todo's

- [ ] implement vizualization techniques for GAM's

In [21]:
-

In [1]:
# Arguments:

#  pooling_layer="sum",
#  nonlinearity="relu",  # relu or exp
#  optimizer="adam",
#  batch_size=32,
#  n_epochs=3,
#  early_stop_patience=None,
#  n_iterations_checkpoint=20,
#  # network details
#  motif_length=9,
#  n_motifs=6,
#  step_size=0.01,
#  step_epoch=10,
#  step_decay=0.95,
#  # - multi-task learning
#  num_tasks=1,
#  # - splines
#  n_splines=None,
#  share_splines=False,  # should the positional bias be shared across motifs
#  spline_exp=False,    # use the exponential function
#  # regularization
#  lamb=1e-5,
#  motif_lamb=1e-5,
#  spline_lamb=1e-5,
#  spline_param_lamb=1e-5,
#  # initialization
#  init_motifs=None,  # motifs to intialize
#  init_motifs_scale=1,  # scale at which to initialize the weights
#  # right scale
#  nonlinearity_scale_factor=1,
#  init_motif_bias=0,
#  init_sd_motif=1e-2,
#  init_sd_w=1e-3,         # initial weight scale of feature w or motif w
#  # init_feat_w_lm=False,    # initalize features with a linear model
#  # outuput detail
#  print_every=100,

In [2]:
## define my own layer

from keras import backend as K
from keras.engine.topology import Layer
import numpy as np

## TODO - how to intialize the motif weights + biases?
## - init_motifs_scale
## - init_motif_bias

## TODO - setup init function

## define the model here
def DNA_conv_layer(seq_length, num_filters=(15, 15), conv_width=(15, 15), pool_width=35, L1=0, dropout=0.1):
    """
    Very frequently used conv layer for sequence
    """
    model = Sequential()
    assert len(num_filters) == len(conv_width)
    for i, (nb_filter, nb_col) in enumerate(zip(num_filters, conv_width)):
        conv_height = 4 if i == 0 else 1
        # TODO - are these parameters correct?
        model.add(Convolution2D(
            nb_filter=nb_filter, nb_row=conv_height,
            nb_col=nb_col, activation='linear',
            init='he_normal', input_shape=(4, seq_length, 1),
            dim_ordering='tf',
            W_regularizer=l1(L1)))
        model.add(Activation('relu'))
        model.add(Dropout(dropout))

    # for avg pooling - determine the maximum number of returned dimentions
    # merge together
    # model.add(AveragePooling2D(pool_size=(1, pool_width)))
    # model.add(Flatten())
    model.add(GlobalAveragePooling2D())
    return model




def deep_wide_model(n_features, seq_length, loss="mse", num_filters=(15, 15), conv_width=(15, 15),
                    lr=0.001,
                    pool_width=35, L1=0, L1_weights=0, dropout=0.1):
    conv_model = DNA_conv_layer(seq_length, num_filters, conv_width, pool_width, L1, dropout)

    # linear model
    linear_model = Sequential()
    linear_model.add(Activation("linear", input_shape=(n_features, )))
    merged = Merge([conv_model, linear_model], mode='concat')

    final_model = Sequential()
    final_model.add(merged)
    final_model.add(Dense(output_dim=1, W_regularizer=l1(L1_weights)))

    # model.add(Activation('linear'))
    final_model.compile(optimizer=Adam(lr=lr), loss=loss)
    return final_model

def concise_model():
    pass


class Concise(Layer):
    def __name__(self):
        return "Concise"
    
    # TODO 
    def __init__(self, output_dim, **kwargs):
        self.output_dim = output_dim
        super(MyLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        # Create a trainable weight variable for this layer.
        self.W = self.add_weight(shape=(input_shape[1], self.output_dim),
                                 initializer='uniform',
                                 trainable=True)
        super(MyLayer, self).build(input_shape)  # Be sure to call this somewhere!

    def call(self, x, mask=None):
        return K.dot(x, self.W)

    def get_output_shape_for(self, input_shape):
        return (input_shape[0], self.output_dim)
    
    def get_config(self):
        config = {'output_dim': self.output_dim}
        base_config = super(MyLayer, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

Using Theano backend.
ERROR (theano.sandbox.cuda): nvcc compiler not found on $PATH. Check your nvcc installation and try again.
