### Packages

In [26]:
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import SGD #to have Nesterov (ADAMS available)
import time
import math

### Data (strings) and splittng

In [27]:
fname = "DATA/sequences12.csv"
sx, sy = np.loadtxt(fname, delimiter = ',',
                   usecols = (0,1), unpack=True, dtype=str)
N = len(sy)
Ls = len(sx[0])

perc_train = 0.8
N_train = int(N*perc_train)
N_test = N - N_train
print(f'\ndata: {N}\ntrain: {N_train}\ntest: {N_test}')


data: 10000
train: 8000
test: 2000


In [28]:
Q = ['A','C','G','T']
Nc = 4
onehc = {Q[i]:i for i in range(Nc)}
print(onehc)

{'A': 0, 'C': 1, 'G': 2, 'T': 3}


### Data conversion


In [29]:
y = sy.astype(int)

L = Ls * Nc
print(L)

x = np.zeros((N,L))

for n in range(N):
    for i in range(Ls):
        #for every one step I have of 4 in the step of ohenc
        x[n][i*4+onehc[sx[n][i]]] = 1

48


In [30]:
print(sx[0])
print(x[0])

TAGGCGTCGATG
[0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 1. 0.
 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0.]


### Split train/test-validation

In [31]:
(x_train, y_train ) = (x[:N_train], y[:N_train])
(x_test, y_test ) = (x[N_train:], y[N_train:])

#fraction of data equal to one to get a point on the balance
print(y_train.sum() / N_train)
print(y_test.sum() / N_test)

0.314125
0.3205


### Keras

In [32]:
np.random.seed(123)

model = Sequential()

model.add(Dense(L, input_shape=(L,),activation="relu"))
model.add(Dense(L/2, activation="relu"))
model.add(Dense(L/4, activation="relu"))
model.add(Dropout(0.2))
model.add(Dense(1, activation = 'sigmoid'))

print(model.summary())

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_13 (Dense)             (None, 48)                2352      
_________________________________________________________________
dense_14 (Dense)             (None, 24)                1176      
_________________________________________________________________
dense_15 (Dense)             (None, 12)                300       
_________________________________________________________________
dropout_1 (Dropout)          (None, 12)                0         
_________________________________________________________________
dense_16 (Dense)             (None, 1)                 13        
Total params: 3,841
Trainable params: 3,841
Non-trainable params: 0
_________________________________________________________________
None


In [33]:
model.compile (loss='binary_crossentropy',
              optimizer = 'adam',
              metrics = ['accuracy'])

In [34]:
fit = model.fit(x_train, y_train,
               epochs = 30, batch_size = 50,
               validation_data = (x_test,y_test),
               shuffle=True)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [15]:
model = keras.Sequential([
    keras.layers.Dense(units=512, activation='relu', input_shape=[8]), 
    keras.layers.Dense(units=512, activation='relu' ), 
    keras.layers.Dense(units=512, activation='relu' ), 
    keras.layers.Dense(units=1)
])
print(model.summary())

model.compile (loss='binary_crossentropy',
              optimizer = 'adam',
              metrics = ['accuracy'])

fit = model.fit(x_train, y_train,
               epochs = 30, batch_size = 50,
               validation_data = (x_test,y_test),
               shuffle=True)

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_5 (Dense)              (None, 512)               4608      
_________________________________________________________________
dense_6 (Dense)              (None, 512)               262656    
_________________________________________________________________
dense_7 (Dense)              (None, 512)               262656    
_________________________________________________________________
dense_8 (Dense)              (None, 1)                 513       
Total params: 530,433
Trainable params: 530,433
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/30


ValueError: in user code:

    /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py:805 train_function  *
        return step_function(self, iterator)
    /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py:795 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:1259 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:2730 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:3417 _call_for_each_replica
        return fn(*args, **kwargs)
    /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py:788 run_step  **
        outputs = model.train_step(data)
    /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py:754 train_step
        y_pred = self(x, training=True)
    /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py:998 __call__
        input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)
    /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow/python/keras/engine/input_spec.py:259 assert_input_compatibility
        ' but received input with shape ' + display_shape(x.shape))

    ValueError: Input 0 of layer sequential_1 is incompatible with the layer: expected axis -1 of input shape to have value 8 but received input with shape (50, 48)
