# Imports

In [8]:
import pandas as pd
import numpy as np

This is required in order to be able to do relative imports like phcnn.layers

In [2]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [9]:
from keras.layers import (Lambda, MaxPooling1D, Flatten,
                          Dropout, Dense, Input)
from keras.models import Model
from keras.backend import floatx

from phcnn.layers import PhyloConv1D, euclidean_distances
from keras.utils.np_utils import to_categorical

# Parameters

Parameters from convolutional layer. nb_neighbors is the number of neighbors to be convoluted together, nb_filters is the number of convolutional filter.

In [4]:
nb_neighbors = 4
nb_filters = 4

# Import of data

We need to expand Xs to be of the shape (filters, nb_samples, nb_features), so we apply a np.expand_dims to signal that we have only one filter. Futhermore we need to have y in a categorical form so we apply to_categorical.

In [5]:
Xs = pd.read_csv('../datasets/ibd_dataset/HS_CDf/Sokol_16S_taxa_HS_CDf_commsamp_training.txt',
                sep='\t', header=0, index_col=0).as_matrix()
nb_features = Xs.shape[1]
Xs = np.expand_dims(Xs, axis=-1)
y = np.loadtxt('../datasets/ibd_dataset/HS_CDf/Sokol_16S_taxa_HS_CDf_commsamp_training_lab.txt', dtype=np.int)
y = to_categorical(y)

Futhermore we need to import the MDS coordinates for our features. Unfortunately, keras has a problem. It requires a batch size and it requires all the batch sizes for the inputs to be the same. So we need to add padding to simulate that we have a sample size even for the coordinates. We choosed to do it in the most straigthforward way possibile, we simply duplicate the coordinate matrix for every sample. We will drop such padding after the matrix is loaded in the network.

In [None]:
c = pd.read_csv('../datasets/coordinates/coordinates_cdf.txt',
                sep='\t', header=0, index_col=0).as_matrix()
nb_coordinates = c.shape[0]
coord = np.empty((Xs.shape[0],) + c.shape, dtype=np.float64)
for i in range(Xs.shape[0]):
    coord[i] = c
coord = np.expand_dims(coord, axis=-1)

# Network



In [6]:
data = Input(shape=(nb_features, 1), name="data", dtype=floatx())
coordinates = Input(shape=(nb_coordinates, nb_features, 1),
                            name="coordinates", dtype=floatx())

conv_layer = data
# We remove the padding that we added to work around keras limitations
conv_crd = Lambda(lambda c: c[0], output_shape=lambda s: (s[1:]))(coordinates)

distances = euclidean_distances(conv_crd)
conv_layer, conv_crd = PhyloConv1D(distances, nb_neighbors,
                                   nb_filters, activation='relu')([conv_layer, conv_crd])

max = MaxPooling1D(pool_size=2, padding="valid")(conv_layer)
flatt = Flatten()(max)
drop = Dropout(0.25)(Dense(units=64, activation='relu')(flatt))
output = Dense(units=2, kernel_initializer="he_normal",
               activation="softmax", name='output')(drop)

model = Model(inputs=[data, coordinates], outputs=output)
model.compile(optimizer='Adam', loss='categorical_crossentropy')

In [7]:
model.fit(x=[Xs, coord], y=y)

Epoch 1/1


<keras.callbacks.History at 0x7f183b5c2ac8>