In [None]:
import os
import re

full_path = os.path.realpath("/content/drive/MyDrive/data_bert")
os.chdir(os.path.dirname(full_path))

print(f"Change CWD to: {os.path.dirname(full_path)}")


Change CWD to: /content/drive/MyDrive


In [None]:
def extract_seq(file_path, dir_path='seq', seq_length=510):
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)
    nseq = 0
    nsmp = 0
    data = re.split(r'(^>.*)', ''.join(open(file_path).readlines()), flags=re.M)
    for i in range(2, len(data), 2):
        fid = data[i-1][1:].split('|')[0]
        nseq = nseq + 1
        fasta = list(data[i].replace('\n', '').replace('\x1a', ''))
        seq = [' '.join(fasta[j:j + seq_length])
               for j in range(0, len(fasta) + 1, seq_length)]
        nsmp = nsmp + len(seq)
        ffas = open(f"{dir_path}/{fid}.seq", "w")
        ffas.write('\n'.join(seq))
    print(f"Number of sequences: {nseq}")
    print(f"Number of samples: {nsmp}")


In [None]:
extract_seq(full_path+"/non.cv.txt", 'cv_neg')
extract_seq(full_path+"/enhancer.cv.txt", 'cv_pos')

extract_seq(full_path+"/non.ind.txt", 'ind_neg')
extract_seq(full_path+"/enhancer.ind.txt", 'ind_pos')

Number of sequences: 1484
Number of samples: 1484
Number of sequences: 1484
Number of samples: 1484
Number of sequences: 200
Number of samples: 200
Number of sequences: 200
Number of samples: 200


**second step**


In [None]:
!pip install wget

Collecting wget
  Downloading wget-3.2.zip (10 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: wget
  Building wheel for wget (setup.py) ... [?25l[?25hdone
  Created wheel for wget: filename=wget-3.2-py3-none-any.whl size=9655 sha256=8c9b7945341426d419378afe4b7d6244085e5db7e213c1192b47089ab393796d
  Stored in directory: /root/.cache/pip/wheels/8b/f1/7f/5c94f0a7a505ca1c81cd1d9208ae2064675d97582078e6c769
Successfully built wget
Installing collected packages: wget
Successfully installed wget-3.2


In [None]:
url="https://storage.googleapis.com/bert_models/2019_05_30/wwm_uncased_L-24_H-1024_A-16.zip"

import wget
filename = wget.download(url)

In [None]:
!unzip "/content/drive/MyDrive/wwm_uncased_L-24_H-1024_A-16.zip"

Archive:  /content/drive/MyDrive/wwm_uncased_L-24_H-1024_A-16.zip
   creating: wwm_uncased_L-24_H-1024_A-16/
  inflating: wwm_uncased_L-24_H-1024_A-16/bert_model.ckpt.meta  
  inflating: wwm_uncased_L-24_H-1024_A-16/bert_model.ckpt.data-00000-of-00001  
  inflating: wwm_uncased_L-24_H-1024_A-16/vocab.txt  
  inflating: wwm_uncased_L-24_H-1024_A-16/bert_model.ckpt.index  
  inflating: wwm_uncased_L-24_H-1024_A-16/bert_config.json  


In [None]:
# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#"""Extract pre-computed feature vectors from BERT."""

In [None]:
!pip install tensorflow



In [None]:
!sudo apt-get install python3

Reading package lists... Done
^C


In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import sys
sys.path.append("drive/MyDrive/bert_repo")

import codecs
import collections
import json
import re
import tensorflow as tf
import modeling
import tokenization

In [2]:
class InputExample(object):

  def __init__(self, unique_id, text_a, text_b):
    self.unique_id = unique_id
    self.text_a = text_a
    self.text_b = text_b

class InputFeatures(object):
  """A single set of features of data."""

  def __init__(self, unique_id, tokens, input_ids, input_mask, input_type_ids):
    self.unique_id = unique_id
    self.tokens = tokens
    self.input_ids = input_ids
    self.input_mask = input_mask
    self.input_type_ids = input_type_ids



In [3]:
def input_fn_builder(features, seq_length):
  """Creates an `input_fn` closure to be passed to TPUEstimator."""

  all_unique_ids = []
  all_input_ids = []
  all_input_mask = []
  all_input_type_ids = []

  for feature in features:
    all_unique_ids.append(feature.unique_id)
    all_input_ids.append(feature.input_ids)
    all_input_mask.append(feature.input_mask)
    all_input_type_ids.append(feature.input_type_ids)

  def input_fn(params):
    """The actual input function."""
    batch_size = params["batch_size"]

    num_examples = len(features)

    # This is for demo purposes and does NOT scale to large data sets. We do
    # not use Dataset.from_generator() because that uses tf.py_func which is
    # not TPU compatible. The right way to load data is with TFRecordReader.
    d = tf.data.Dataset.from_tensor_slices({
        "unique_ids":
            tf.constant(all_unique_ids, shape=[num_examples], dtype=tf.int32),
        "input_ids":
            tf.constant(
                all_input_ids, shape=[num_examples, seq_length],
                dtype=tf.int32),
        "input_mask":
            tf.constant(
                all_input_mask,
                shape=[num_examples, seq_length],
                dtype=tf.int32),
        "input_type_ids":
            tf.constant(
                all_input_type_ids,
                shape=[num_examples, seq_length],
                dtype=tf.int32),
    })

    d = d.batch(batch_size=batch_size, drop_remainder=False)
    return d

  return input_fn

In [4]:


def model_fn_builder(bert_config, init_checkpoint, layer_indexes, use_tpu,
                     use_one_hot_embeddings):
  """Returns `model_fn` closure for TPUEstimator."""

  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    unique_ids = features["unique_ids"]
    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    input_type_ids = features["input_type_ids"]

    model = modeling.BertModel(
        config=bert_config,
        is_training=False,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=input_type_ids,
        use_one_hot_embeddings=use_one_hot_embeddings)

    if mode != tf.estimator.ModeKeys.PREDICT:
      raise ValueError("Only PREDICT modes are supported: %s" % (mode))

    tvars = tf.compat.v1.trainable_variables()
    scaffold_fn = None
    (assignment_map,
     initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(
         tvars, init_checkpoint)
    if use_tpu:

      def tpu_scaffold():
        tf.compat.v1.train.init_from_checkpoint(init_checkpoint, assignment_map)
        return tf.train.Scaffold()

      scaffold_fn = tpu_scaffold
    else:
      tf.compat.v1.train.init_from_checkpoint(init_checkpoint, assignment_map)

    tf.compat.v1.logging.info("**** Trainable Variables ****")
    for var in tvars:
      init_string = ""
      if var.name in initialized_variable_names:
        init_string = ", *INIT_FROM_CKPT*"
      tf.compat.v1.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                      init_string)

    all_layers = model.get_all_encoder_layers()

    predictions = {
        "unique_id": unique_ids,
    }

    for (i, layer_index) in enumerate(layer_indexes):
      predictions["layer_output_%d" % i] = all_layers[layer_index]

    output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
        mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)
    return output_spec

  return model_fn

In [5]:
def convert_examples_to_features(examples, seq_length, tokenizer):
  """Loads a data file into a list of `InputBatch`s."""

  features = []
  for (ex_index, example) in enumerate(examples):
    tokens_a = tokenizer.tokenize(example.text_a)

    tokens_b = None
    if example.text_b:
      tokens_b = tokenizer.tokenize(example.text_b)

    if tokens_b:
      # Modifies `tokens_a` and `tokens_b` in place so that the total
      # length is less than the specified length.
      # Account for [CLS], [SEP], [SEP] with "- 3"
      _truncate_seq_pair(tokens_a, tokens_b, seq_length - 3)
    else:
      # Account for [CLS] and [SEP] with "- 2"
      if len(tokens_a) > seq_length - 2:
        tokens_a = tokens_a[0:(seq_length - 2)]

    # The convention in BERT is:
    # (a) For sequence pairs:
    #  tokens:   [CLS] is this jack ##son ##ville ? [SEP] no it is not . [SEP]
    #  type_ids: 0     0  0    0    0     0       0 0     1  1  1  1   1 1
    # (b) For single sequences:
    #  tokens:   [CLS] the dog is hairy . [SEP]
    #  type_ids: 0     0   0   0  0     0 0
    #
    # Where "type_ids" are used to indicate whether this is the first
    # sequence or the second sequence. The embedding vectors for `type=0` and
    # `type=1` were learned during pre-training and are added to the wordpiece
    # embedding vector (and position vector). This is not *strictly* necessary
    # since the [SEP] token unambiguously separates the sequences, but it makes
    # it easier for the model to learn the concept of sequences.
    #
    # For classification tasks, the first vector (corresponding to [CLS]) is
    # used as as the "sentence vector". Note that this only makes sense because
    # the entire model is fine-tuned.
    tokens = []
    input_type_ids = []
    tokens.append("[CLS]")
    input_type_ids.append(0)
    for token in tokens_a:
      tokens.append(token)
      input_type_ids.append(0)
    tokens.append("[SEP]")
    input_type_ids.append(0)

    if tokens_b:
      for token in tokens_b:
        tokens.append(token)
        input_type_ids.append(1)
      tokens.append("[SEP]")
      input_type_ids.append(1)

    input_ids = tokenizer.convert_tokens_to_ids(tokens)

    # The mask has 1 for real tokens and 0 for padding tokens. Only real
    # tokens are attended to.
    input_mask = [1] * len(input_ids)

    # Zero-pad up to the sequence length.
    while len(input_ids) < seq_length:
      input_ids.append(0)
      input_mask.append(0)
      input_type_ids.append(0)

    assert len(input_ids) == seq_length
    assert len(input_mask) == seq_length
    assert len(input_type_ids) == seq_length

    if ex_index < 5:
      tf.compat.v1.logging.info("*** Example ***")
      tf.compat.v1.logging.info("unique_id: %s" % (example.unique_id))
      tf.compat.v1.logging.info("tokens: %s" % " ".join(
          [tokenization.printable_text(x) for x in tokens]))
      tf.compat.v1.logging.info("input_ids: %s" % " ".join([str(x) for x in input_ids]))
      tf.compat.v1.logging.info("input_mask: %s" % " ".join([str(x) for x in input_mask]))
      tf.compat.v1.logging.info(
          "input_type_ids: %s" % " ".join([str(x) for x in input_type_ids]))

    features.append(
        InputFeatures(
            unique_id=example.unique_id,
            tokens=tokens,
            input_ids=input_ids,
            input_mask=input_mask,
            input_type_ids=input_type_ids))
  return features


In [6]:
def _truncate_seq_pair(tokens_a, tokens_b, max_length):
  """Truncates a sequence pair in place to the maximum length."""

  # This is a simple heuristic which will always truncate the longer sequence
  # one token at a time. This makes more sense than truncating an equal percent
  # of tokens from each, since if one sequence is very short then each token
  # that's truncated likely contains more information than a longer sequence.
  while True:
    total_length = len(tokens_a) + len(tokens_b)
    if total_length <= max_length:
      break
    if len(tokens_a) > len(tokens_b):
      tokens_a.pop()
    else:
      tokens_b.pop()

In [7]:
def read_examples(input_file):
  """Read a list of `InputExample`s from an input file."""
  examples = []
  unique_id = 0
  with tf.io.gfile.GFile(input_file, "r") as reader:
    while True:
      line = tokenization.convert_to_unicode(reader.readline())
      if not line:
        break
      line = line.strip()
      text_a = None
      text_b = None
      m = re.match(r"^(.*) \|\|\| (.*)$", line)
      if m is None:
        text_a = line
      else:
        text_a = m.group(1)
        text_b = m.group(2)
      examples.append(
          InputExample(unique_id=unique_id, text_a=text_a, text_b=text_b))
      unique_id += 1
  return examples

In [18]:
def main():
  tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)
  layers="1,2,3,4"
  layer_indexes = [int(x) for x in layers.split(",")]
  bert_config_file="/content/drive/MyDrive/wwm_uncased_L-24_H-1024_A-16/bert_config.json"
  bert_config = modeling.BertConfig.from_json_file(bert_config_file)

  tokenizer = tokenization.FullTokenizer(
      vocab_file="/content/drive/MyDrive/vocabs.txt", do_lower_case=True)

  is_per_host = tf.compat.v1.estimator.tpu.InputPipelineConfig.PER_HOST_V2
  run_config = tf.compat.v1.estimator.tpu.RunConfig(
      master=None,
      tpu_config=tf.compat.v1.estimator.tpu.TPUConfig(
          num_shards=8,
          per_host_input_for_training=is_per_host))

  examples = read_examples("/content/drive/MyDrive/bert/cv_neg/CHR10_108355610_108355810.seq")

  features = convert_examples_to_features(
      examples=examples, seq_length=512, tokenizer=tokenizer)

  unique_id_to_feature = {}
  for feature in features:
    unique_id_to_feature[feature.unique_id] = feature
  init_checkpoint="/content/drive/MyDrive/wwm_uncased_L-24_H-1024_A-16/bert_model.ckpt.index"
  model_fn = model_fn_builder(
      bert_config=bert_config,
      init_checkpoint=init_checkpoint,
      layer_indexes=layer_indexes,
      use_tpu=False,
      use_one_hot_embeddings=False)

  # If TPU is not available, this will fall back to normal Estimator on CPU
  # or GPU.
  estimator = tf.compat.v1.estimator.tpu.TPUEstimator(
      use_tpu=False,
      model_fn=model_fn,
      config=run_config,
      predict_batch_size=64)

  input_fn = input_fn_builder(
      features=features, seq_length=512)

  with codecs.getwriter("utf-8")(tf.io.gfile.GFile("/content/drive/MyDrive/bert/neg/output3.jsonl",
                                               "w")) as writer:
    for result in estimator.predict(input_fn, yield_single_examples=True):
      unique_id = int(result["unique_id"])
      feature = unique_id_to_feature[unique_id]
      output_json = collections.OrderedDict()
      output_json["linex_index"] = unique_id
      all_features = []
      for (i, token) in enumerate(feature.tokens):
        all_layers = []
        for (j, layer_index) in enumerate(layer_indexes):
          layer_output = result["layer_output_%d" % j]
          layers = collections.OrderedDict()
          layers["index"] = layer_index
          layers["values"] = [
              round(float(x), 6) for x in layer_output[i:(i + 1)].flat
          ]
          all_layers.append(layers)
        features = collections.OrderedDict()
        features["token"] = token
        features["layers"] = all_layers
        all_features.append(features)
      output_json["features"] = all_features
      writer.write(json.dumps(output_json) + "\n")



In [19]:
if __name__ == "__main__":
  main()

INFO:tensorflow:*** Example ***
INFO:tensorflow:unique_id: 0
INFO:tensorflow:tokens: [CLS] c t a c t c c a t g a a a c a a t c c t g t a t c a t t t t g a c a g g a t a t a a c t c a t a c c t g c t t t a a a t t t c c t a t a c a g c t t t a g t t g c t a a t a a t a a t a a c a g t a a a a a t a a a t a t t a a a g c t a t g t t t c a g a c a c t a t c c t a a g t g c a t t a c c t g a a t t a a t c t t c a c a c a a t c c c a c t g g c a g a g a t g c t a t t a c c a t c t c t g a g [SEP]
INFO:tensorflow:input_ids: 101 172 189 170 172 189 172 172 170 189 176 170 170 170 172 170 170 189 172 172 189 176 189 170 189 172 170 189 189 189 189 176 170 172 170 176 176 170 189 170 189 170 170 172 189 172 170 189 170 172 172 189 176 172 189 189 189 170 170 170 189 189 189 172 172 189 170 189 170 172 170 176 172 189 189 189 170 176 189 189 176 172 189 170 170 189 170 170 189 170 170 189 170 170 172 170 176 189 170 170 170 170 170 189 170 170 170 189 170 189 189 170 170 170 176 172 189 170 189 

In [None]:
tf.compat.v1.logging
tf.compat.v1.layers.dense
tf.compat.v1.get_variable
tf.compat.v1.variable_scope
tf.compat.v1.estimator
tf.compat.v1.train.init_from_checkpoint

**Third step**


In [30]:
import json
import pandas as pd
import numpy as np
import sys


input_file = '/content/drive/MyDrive/bert/pos/output.jsonl'
output_file = '/content/drive/MyDrive/bert/pos/pos/output.csv'

with open(input_file, 'r') as json_file:
    json_list = list(json_file)


In [31]:

fout = open(output_file,'w')
for json_str in json_list:
    tokens = json.loads(json_str)["features"]
    for token in tokens:
        if token['token'] in ['[CLS]','[SEP]']:
            continue
        else:
            last_layers = np.sum([
                token['layers'][0]['values'],
                token['layers'][1]['values'],
                token['layers'][2]['values'],
                token['layers'][3]['values'],
            ], axis=0)
            fout.write(f'{",".join(["{:f}".format(i) for i in last_layers])}\n')


**Fourth step**

In [32]:

data_dir = '/datatset'

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import glob
import os

In [34]:
df_data = pd.DataFrame()
df_labels = []

In [35]:
print('Reading data ...')
for fileName in glob.glob(os.path.join('/content/drive/MyDrive/bert/pos/pos', '*.{}'.format('csv'))):
    df = pd.read_csv(fileName, header=None)
    print('In processing: ', fileName)
    df_new = df.stack().to_frame().T
    df_data = df_data.append(df_new, ignore_index=True)
    df_labels.append(1)

print('Reading negative data ...')
for fileName in glob.glob(os.path.join('/content/drive/MyDrive/bert/neg/neg', '*.{}'.format('csv'))):
    df = pd.read_csv(fileName, header=None)
    print('In processing: ', fileName)
    df_new = df.stack().to_frame().T
    df_data = df_data.append(df_new, ignore_index=True)
    df_labels.append(0)

Reading data ...
In processing:  /content/drive/MyDrive/bert/pos/pos/output3.csv
In processing:  /content/drive/MyDrive/bert/pos/pos/output2.csv


  df_data = df_data.append(df_new, ignore_index=True)
  df_data = df_data.append(df_new, ignore_index=True)


In processing:  /content/drive/MyDrive/bert/pos/pos/output.csv


  df_data = df_data.append(df_new, ignore_index=True)


Reading negative data ...
In processing:  /content/drive/MyDrive/bert/neg/neg/output.csv


  df_data = df_data.append(df_new, ignore_index=True)
  df_data = df_data.append(df_new, ignore_index=True)


In processing:  /content/drive/MyDrive/bert/neg/neg/output2.csv
In processing:  /content/drive/MyDrive/bert/neg/neg/output3.csv


  df_data = df_data.append(df_new, ignore_index=True)


In [36]:
X_trn = df_data
y_trn = df_labels

In [41]:
X_trn

Unnamed: 0_level_0,0,0,0,0,0,0,0,0,0,0,...,199,199,199,199,199,199,199,199,199,199
Unnamed: 0_level_1,0,1,2,3,4,5,6,7,8,9,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,-2.904895,-2.450678,4.395978,3.26153,6.559656,-0.196499,-2.240279,1.842122,-3.832016,-1.495885,...,0.496422,-1.421979,-3.760705,-3.100987,2.898359,-2.520253,-3.840402,-2.503434,-4.220105,-4.442402
1,2.458968,-4.852277,-3.247235,-0.530206,-8.029361,2.705585,-2.437454,7.077261,3.702555,-4.560316,...,0.131755,-3.931052,-3.712069,1.899212,-0.010228,0.585479,3.607199,1.052473,-0.480935,-5.34185
2,3.493159,-7.867645,1.352472,5.236685,-3.472324,2.136778,1.429936,3.910075,-2.852195,0.868437,...,,,,,,,,,,
3,0.724152,6.496428,2.325362,2.951714,1.662427,1.46244,-0.029788,-5.472077,2.296859,-2.840021,...,1.378899,-5.797547,-2.156732,-0.594492,-3.273976,-2.16719,-5.157911,-8.260825,-1.958489,1.851325
4,2.798739,3.141852,-0.203297,1.520197,0.918013,1.276347,1.538334,-0.519954,0.582979,-6.243598,...,3.808644,-0.638723,0.986916,-0.642885,2.164882,-2.50592,-2.430401,-3.293691,-1.077519,-0.302015
5,3.860233,5.529972,-1.004072,-6.456079,2.277514,3.685056,-1.131817,2.988829,-2.27843,1.998196,...,1.659769,2.734407,-4.886227,2.310485,1.441351,-1.807809,0.680148,-4.058925,0.366256,1.773841


In [39]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_trn, y_trn, test_size=0.25, random_state=42)


In [46]:
X_train

Unnamed: 0_level_0,0,0,0,0,0,0,0,0,0,0,...,199,199,199,199,199,199,199,199,199,199
Unnamed: 0_level_1,0,1,2,3,4,5,6,7,8,9,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
5,3.860233,5.529972,-1.004072,-6.456079,2.277514,3.685056,-1.131817,2.988829,-2.27843,1.998196,...,1.659769,2.734407,-4.886227,2.310485,1.441351,-1.807809,0.680148,-4.058925,0.366256,1.773841
2,3.493159,-7.867645,1.352472,5.236685,-3.472324,2.136778,1.429936,3.910075,-2.852195,0.868437,...,,,,,,,,,,
4,2.798739,3.141852,-0.203297,1.520197,0.918013,1.276347,1.538334,-0.519954,0.582979,-6.243598,...,3.808644,-0.638723,0.986916,-0.642885,2.164882,-2.50592,-2.430401,-3.293691,-1.077519,-0.302015
3,0.724152,6.496428,2.325362,2.951714,1.662427,1.46244,-0.029788,-5.472077,2.296859,-2.840021,...,1.378899,-5.797547,-2.156732,-0.594492,-3.273976,-2.16719,-5.157911,-8.260825,-1.958489,1.851325


In [40]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras import utils
from tensorflow.keras import optimizers
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import confusion_matrix

In [43]:

num_features = 153600

nb_classes = 2
nb_kernels = 3
nb_pools = 2
nb_epochs = 15


In [44]:
def _2D_CNN_model():
    model = Sequential()

    model.add(Conv2D(32, 3, 3, input_shape=(768,200,1), activation='relu'))
    model.add(MaxPooling2D(2))

    model.add(Conv2D(64, 3, 3, activation='relu'))
    model.add(MaxPooling2D(2))

    model.add(Flatten())
    model.add(Dense(64, activation='relu'))
    model.add(Dense(nb_classes, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model


In [49]:
X_train.size

819200

In [50]:
X_train.shape

(4, 204800)

In [51]:
len(X_train)

4

In [54]:
len(np.asarray(X_train))

4

In [None]:
np.asarray(X_train).reshape(5000,768,200,1)

In [None]:
_2D_model = _2D_CNN_model()

# Plot model history
_2D_history = _2D_model.fit(np.asarray(X_train).reshape(len(np.asarray(X_train)),768,200,60), utils.to_categorical(y_train,nb_classes),
                    validation_data=(np.asarray(X_test).reshape(len(np.asarray(X_test)),768,200,1), utils.to_categorical(y_test,nb_classes)),
                    epochs=nb_epochs, batch_size=16, verbose=1)