In [2]:
from __future__ import division
import os
import csv
import math
import imageio
import scipy.misc
import numpy as np
import pandas as pd
from PIL import Image
import tensorflow as tf
from operator import itemgetter

from random import sample
import shutil



In [5]:
# Creating a cvs file from the bpseq file
def readfile(filename, rna_dir):
    fopen = open(rna_dir + filename, 'r')
    rnafile = fopen.readlines()
    with open("test.txt", 'w') as f:
        for i in range(len(rnafile)):
            s = rnafile[i]
            result = '\t'.join(s.split())
            result = result + "\n"
            f.write(result)
    data = pd.read_csv("test.txt", sep='\t', header=None)
    return data, filename


# Check for pseudknots if there is one presented then return true otherwise false
def check_pseudoknot(data, filename):
    rnadata1 = data.loc[:, 0]
    rnadata2 = data.loc[:, 2]
    flag = False
    for i in range(len(rnadata2)):
        for j in range(len(rnadata2)):
            if (rnadata1[i] < rnadata1[j] < rnadata2[i] < rnadata2[j]):
                flag = True
                break
    return flag

# Removing files with pseudoknots
def remove_pseudoknot(rna_dir):
    pathDir = os.listdir(rna_dir)
    for i in pathDir:
        data, filename = readfile(i, rna_dir)
        flag = check_pseudoknot(data, filename)
        if flag:
            os.remove(rna_dir + i)


# Create dot-bracket structure
def transform(data, filename):
    rnaseq = data.loc[:, 1]
    rnadata1 = data.loc[:, 0]
    rnadata2 = data.loc[:, 2]
    rnastructure = []
    for i in range(len(rnadata2)):
        if rnadata2[i] == 0:
            rnastructure.append(".")
        else:
            if rnadata1[i] > rnadata2[i]:
                rnastructure.append(")")
            else:
                rnastructure.append("(")
    return rnaseq, rnastructure, filename


# Extract sequence and structure data
def data_extract(rna_dir):
    pathDir = os.listdir(rna_dir)
    data_dict = {}
    for i in pathDir:
        data, filename = readfile(i, rna_dir)
        rnaseq, rnastructure, filename = transform(data, filename)
        data_dict[filename] = [rnaseq, rnastructure]
    return data_dict


def Gaussian(x):
    return math.exp(-0.5*(x*x))

# Gives value how likely a base pair happen
def paired(x, y):
    if x == 'A' and y == 'U':
        return 2
    elif x == 'G' and y == 'C':
        return 3
    elif x == "G" and y == 'U':
        return 0.8
    elif x == 'U' and y == 'A':
        return 2
    elif x == 'C' and y == 'G':
        return 3
    elif x == "U" and y == 'G':
        return 0.8
    else:
        return 0

# Representing the rna as matrix
def creatmat(data):
    mat = np.zeros([len(data), len(data)])
    for i in range(len(data)):
        for j in range(len(data)):
            coefficient = 0
            for add in range(30):
                if i - add >= 0 and j + add < len(data):
                    score = paired(data[i - add], data[j + add])
                    if score == 0:
                        break
                    else:
                        coefficient = coefficient + score * Gaussian(add)
                else:
                    break
            if coefficient > 0:
                for add in range(1, 30):
                    if i + add < len(data) and j - add >= 0:
                        score = paired(data[i + add], data[j - add])
                        if score == 0:
                            break
                        else:
                            coefficient = coefficient + score * Gaussian(add)
                    else:
                        break
            mat[[i], [j]] = coefficient
    return mat

# Helper founction for file name giving
def complete(i):
    if i < 10:
        str1 = '00' + str(i)
    elif i < 100:
        str1 = '0' + str(i)
    else:
        str1 = str(i)
    return str1

# Helper founction for file name giving
def change(x):
    if x == '(':
        return 0
    elif x == ')':
        return 1
    else:
        return 2

# Check whether neigbouring bases also makes a stem brige
def check(data):
    num_0 = num_1 = num_2 = 0
    for i in range(len(data)):
        if data[i][0] == '(':
            num_0 = num_0 + 1
        elif data[i][0] == ')':
            num_1 = num_1 + 1
        else:
            num_2 = num_2 + 1
        if num_1 == num_0:
            return True
        else:
            return False

# Create a picture form the matrix representation, and saves them by the sliding window size
def create_png(data, png_dir):
    for filename, d_list in data.items():
        if check(d_list[0]):
            im = np.zeros([len(d_list[0])+19, len(d_list[0]), 3])
            mat = creatmat(d_list[0])
            im[9:len(d_list[0])+9, 0:len(d_list[0]),
               0] = im[9:len(d_list[0])+9, 0:len(d_list[0]), 0] + mat
            for j in range(len(d_list[0])):
                pic = im[j:j+19]
                image = Image.fromarray(((pic)*255/3).astype(np.uint8), 'RGB')
                image = image.resize((169, 19))
                new_filename = png_dir + str(change(d_list[1][j])) + '.' + filename + '_' + complete(j) + '.png'
                image.save(new_filename)


# Creates intermediate results from the rna sequence which later can be used for the model as data
# rna_dir: rna sequence direcorty path to be preprocessed
# png_dir: aim path where preprocessed images can be saved
def pre_process_files(rna_dir, png_dir):
    remove_pseudoknot(rna_dir)
    data = data_extract(rna_dir)
    create_png(data, png_dir)



'''Uncomment these lines for preprocessing files
    only needd to be done once per dicionary. 
    (The two lines corresponds for training set and evaluation set)
    First input: rna sequnce location
    Second input: location where intermediate results can be saved '''
# pre_process_files('./drive/MyDrive/RNA/TEA/', './drive/MyDrive/RNA/TEA_png/')
# pre_process_files('./drive/MyDrive/RNA/a/', './drive/MyDrive/RNA/a_png/')



53082 44276
53082 26541
TestSetA_png/0.0.bpseq_001.png


In [None]:
from random import sample
import shutil

# Used for data normalization
# src_dir: source directory where preprocessed images can be found
# aim_dir: path where the normalized data can be written
def create_eq_folders(src_dir, aim_dir):
  pathDir = os.listdir(src_dir)
  list_1 = []
  list_2 = []
  for i in range(len(pathDir)):
    name = pathDir[i].split('.')[0]
    if name == '2':
        list_2.append(i)
    else:
        list_1.append(i)
        dest = shutil.copyfile(src_dir+pathDir[i], aim_dir+pathDir[i])
    
  list_2 = sample(list_2, int(len(list_1)/2))
  for i in list_2:
    dest = shutil.copyfile(src_dir+pathDir[i], aim_dir+pathDir[i])


'''Uncomment this lines for normalize files
    only needd to be done once per dicionary. '''
# create_eq_folders('./drive/MyDrive/RNA/a_png/', './drive/MyDrive/RNA/a_png_eq/')


In [6]:
def one_hot_matrix(label, depth=3):
    """
    Computes the one hot encoding for a single label

    Arguments:
        label --  (int) Categorical labels
        depth --  (int) Number of different classes that label can take

    Returns:
         one_hot -- tf.Tensor A single-column matrix with the one hot encoding.
    """
    one_hot = tf.reshape(tf.one_hot(label, depth, axis=0), (depth,))

    return one_hot


def get_files(png_dir):
    """
    Args:
        png_dir:file directtory
    Returns:
        list of images and labels
    """
    left_bracket = []
    label_left_bracket = []
    right_bracket = []
    label_right_bracket = []
    point = []
    label_point = []

    for file in os.listdir(png_dir):
        name = file.split('.')
        if name[0] == '0':
            left_bracket.append(png_dir + file)
            label_left_bracket.append(one_hot_matrix(0, depth=3))
        elif name[0] == '1':
            right_bracket.append(png_dir + file)
            label_right_bracket.append(one_hot_matrix(1, depth=3))
        else:
            point.append(png_dir + file)
            label_point.append(one_hot_matrix(2, depth=3))

    print("There are %d left bracket\nThere are %d right bracket\nThere are %d point" % (
        len(left_bracket), len(right_bracket), len(point)))
    print(len(label_left_bracket), len(label_right_bracket), len(label_point))

    image_list = np.hstack((left_bracket, right_bracket, point))
    label_list = np.vstack(
        (label_left_bracket, label_right_bracket, label_point))

    temp = list(zip(image_list, label_list))
    temp = np.array(temp)
    np.random.shuffle(temp)
    temp = temp.transpose()

    image_list = list(temp[0, :])
    label_list = list(temp[1, :])

    return image_list, label_list



def get_batch(image_paths, labels, batch_size, IMG_W, IMG_H, buffer_size, prefetch, shuffle):
    """
    Args:
        image_paths: list of image file paths
        labels: list of corresponding image labels
        batch_size: batch size
        IMG_W: width of images
        IMG_H: height of images
        buffer_size: buffer size for shuffling
        prefetch: number of batches prefetched
        shuffle: True or False, if false the batches are not shuffled 
    Returns:
        A tf.data.Dataset object containing batches of preprocessed images and labels
    """
    # Define generator function to yield batches of images and labels
    def load_and_preprocess_image(path):
        image = tf.io.read_file(path)
        image = tf.image.decode_png(image, channels=3)
        image = tf.cast(image, tf.float32)
        return image

    
    def generator():
        for _ in range(1):
            indices = [x for x in range(len(image_paths))]
            temp = np.array(indices)
            if shuffle:
              np.random.shuffle(temp)
            for i in range(0, len(image_paths), batch_size):
                batch_indices = temp[i:i+batch_size]
                if len(batch_indices) == batch_size:
                  if batch_size == 1:
                    batch_paths = itemgetter(*batch_indices)(image_paths)
                    batch_labels = itemgetter(*batch_indices)(labels)
                    batch_images = load_and_preprocess_image(batch_paths)
                    batch_images = tf.expand_dims(batch_images, axis=0)
                    batch_labels = tf.expand_dims(batch_labels, axis=0) 
                    yield batch_images, batch_labels
                  else:
                    batch_paths = list(itemgetter(*batch_indices)(image_paths))
                    batch_labels = list(itemgetter(*batch_indices)(labels))
                    batch_images = [load_and_preprocess_image(path) for path in batch_paths]
                    yield tf.stack(batch_images), batch_labels

    # Create dataset from generator function
    data = tf.data.Dataset.from_generator(
        generator,
        output_types=(tf.float32, tf.int32),
        output_shapes=(tf.TensorShape([batch_size, IMG_H, IMG_W, 3]), tf.TensorShape([batch_size, 3]))
    )

    # Shuffle and repeat dataset
    data = data.shuffle(buffer_size , reshuffle_each_iteration=True)
    data = data.repeat(1)
    

    return data



In [7]:
class InferenceModule(tf.Module):
  '''CNN Model
    Args:
    n_classes: number of distinct classes to classify to
    W: image width
    H: image height
    BATCH_SIZE: batch_size of input data'''
  def __init__(self, n_classes, W, H, BATCH_SIZE):
        self.flag = True
        self.n_classes = n_classes
        self.conv1_weights = tf.Variable(tf.random.truncated_normal(
            shape=[3, 3, 3, 16], stddev=0.1), name="conv1_weights")
        self.conv1_biases = tf.Variable(tf.constant(
            0.1, shape=[16]), name="conv1_biases")
        self.conv2_weights = tf.Variable(tf.random.truncated_normal(
            shape=[3, 3, 16, 16], stddev=0.1), name="conv2_weights")
        self.conv2_biases = tf.Variable(tf.constant(
            0.1, shape=[16]), name="conv2_biases")
        self.local3_biases = tf.Variable(tf.constant(
            0.1, shape=[32]), name="local3_biases")
        self.local3_weights = tf.Variable(tf.random.truncated_normal(
            shape=[H*W*16, 32], stddev=0.005), name="local3_weights")
        self.local4_weights = tf.Variable(tf.random.truncated_normal(
            shape=[32, 32], stddev=0.005), name="local4_weights")
        self.local4_biases = tf.Variable(tf.constant(
            0.1, shape=[32]), name="local4_biases")
        self.softmax_linear_weights = tf.Variable(tf.random.truncated_normal(
            shape=[32, self.n_classes], stddev=0.005), name="softmax_linear_weights")
        self.softmax_linear_biases = tf.Variable(tf.constant(
            0.1, shape=[self.n_classes]), name="softmax_linear_biases")

  def __call__(self, images, batch_size):
        """Build the model
        Args:
            image:image batch ,4D tensor, tf.float32,[batch_size,width,height,channels]
            batch_size: batch_size of input data
        Return:
            output tensor with the computed logits,float,[batch_size,n_classes]
        """
        # Conv 1
        conv1_conv = tf.nn.conv2d(images, self.conv1_weights, strides=[
                                  1, 1, 1, 1], padding="SAME")
        pre_activation = tf.nn.bias_add(conv1_conv, self.conv1_biases)
        conv1 = tf.nn.relu(pre_activation, name="conv1")

        # Pool 1
        pool1 = tf.nn.avg_pool2d(conv1, ksize=[1, 3, 3, 1], strides=[
                                 1, 1, 1, 1], padding="SAME", name="pooling1")
        norm1 = tf.nn.local_response_normalization(
            pool1, depth_radius=4, bias=1.0, alpha=0.001/9.0, beta=0.75, name="norm1")
        
        # Conv 2
        conv2_conv = tf.nn.conv2d(norm1, self.conv2_weights, strides=[
                                  1, 1, 1, 1], padding="SAME")
        pre_activation = tf.nn.bias_add(conv2_conv, self.conv2_biases)
        conv2 = tf.nn.relu(pre_activation, name="conv2")

        # Pool 2
        norm2 = tf.nn.local_response_normalization(
            conv2, depth_radius=4, bias=1.0, alpha=0.001/9.0, beta=0.75, name="norm2")
        pool2 = tf.nn.avg_pool2d(norm2, ksize=[1, 3, 3, 1], strides=[
                                 1, 1, 1, 1], padding="SAME", name="pooling2")

        # Local 3
        reshape = tf.reshape(pool2, shape=[batch_size, -1])
        local3 = tf.nn.relu(tf.matmul(reshape, self.local3_weights) +
                            self.local3_biases, name="local3")
        local3_dropout = tf.nn.dropout(local3, rate=0.3)

        # Local 4
        local4 = tf.nn.relu(tf.matmul(local3_dropout, self.local4_weights) +
                            self.local4_biases, name="local4")
        local4_dropout = tf.nn.dropout(local4, rate=0.3)
        

        # Softmax linear
        fully_connect = tf.add(tf.matmul(
            local4_dropout , self.softmax_linear_weights), self.softmax_linear_biases, name="softmax_linear")
        softmax_linear = tf.nn.softmax(fully_connect)
        
        return softmax_linear, fully_connect

# Function to calculate prediction accuracy
def evalution(logits, labels):
    labels = tf.argmax(labels, axis=-1)
    correct = tf.nn.in_top_k(labels, logits, 1)
    correct = tf.cast(correct, tf.float32)
    accuracy = tf.reduce_mean(correct)
    return accuracy



In [8]:
import os
import numpy as np
import tensorflow as tf

# parameters
N_CLASSES = 3
IMG_W = 169
IMG_H = 19
BATCH_SIZE = 64
CAPACITY = 1000
learning_rate = 0.01
num_epochs = 10

def run_training():

    logs_train_dir = './drive/MyDrive/RNA/log/'
    
    train, train_label = get_files('./drive/MyDrive/RNA/a_png_eq/')
    eval, eval_label = get_files('./drive/MyDrive/RNA/a_png_eq/')
    
    model = InferenceModule(N_CLASSES, IMG_W, IMG_H, BATCH_SIZE)
    
    optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)

    checkpoint = tf.train.Checkpoint(model=model, optimizer=optimizer)

    recording = [[[], []], [[], []]]
    for epoch in range(num_epochs):
      epoch_cost = 0.
      epoch_acc = 0.
      i = 0
      data = get_batch(train, train_label, BATCH_SIZE, IMG_W, IMG_H, CAPACITY, 2, True)
      # Train loop
      for batch, (images, labels) in enumerate(data):
          i += 1
          with tf.GradientTape() as tape:
            softmax, train_logits = model(images, BATCH_SIZE)
            train_loss =  tf.reduce_mean(tf.keras.losses.categorical_crossentropy(labels, softmax))

          grads = tape.gradient(train_loss, model.trainable_variables)
          grads_vars = zip(grads, model.trainable_variables)
          optimizer.apply_gradients(grads_vars)


          train_acc = evalution(train_logits, labels)
          epoch_cost += train_loss
          epoch_acc += train_acc

      test_cost = 0.
      test_acc = 0.
      ev_data = get_batch(eval, eval_label, BATCH_SIZE, IMG_W, IMG_H, CAPACITY, 2, True)
      # eval loop
      for batch_te, (images_te, labels_te) in enumerate(ev_data):
        test_softmax, test_logits = model(tf.convert_to_tensor(images_te), BATCH_SIZE)
        test_loss =  tf.reduce_mean(tf.keras.losses.categorical_crossentropy(labels_te, test_softmax))
        acc = evalution(test_logits, labels_te)
        test_cost += test_loss
        test_acc += acc

      # Calc losses and accuracies
      recording[0][0].append(epoch_cost/(batch + 1))
      recording[0][1].append(epoch_acc/(batch + 1))
      recording[1][0].append(test_cost/(batch_te + 1))
      recording[1][1].append(test_acc/(batch_te + 1))
      # print(recording)
      print("Epoch {:03d}: Train loss: {:.3f}, Train accuracy: {:.3%}".format(epoch,
                                                                  recording[0][0][-1],
                                                                  recording[0][1][-1]))
      print("Epoch {:03d}: Test loss: {:.3f}, Test accuracy: {:.3%}".format(epoch,
                                                                  recording[1][0][-1],
                                                                  recording[1][1][-1]))
      # save
      if max(recording[0][1]) == recording[0][1][-1]:
        save_path = checkpoint.save(logs_train_dir+'model')

    with open("loss.txt", 'w') as f:
      for i in range(len(recording[0])):
        f.write(str(recording[1][0][i]))
    with open("accuracy.txt", 'w') as f:
      for i in range(len(recording[0])):
        f.write(str(recording[1][1][i]))

run_training()

There are 167440 left bracket
There are 167440 right bracket
There are 167440 point
167440 167440 167440


  temp = np.array(temp)


There are 26541 left bracket
There are 26541 right bracket
There are 26541 point
26541 26541 26541
Epoch 000: Train loss: 1.067, Train accuracy: 38.240%
Epoch 000: Test loss: 1.027, Test accuracy: 43.006%
Epoch 001: Train loss: 1.024, Train accuracy: 44.323%
Epoch 001: Test loss: 1.009, Test accuracy: 46.015%
Epoch 002: Train loss: 1.007, Train accuracy: 46.432%
Epoch 002: Test loss: 1.000, Test accuracy: 46.955%
Epoch 003: Train loss: 0.994, Train accuracy: 47.804%
Epoch 003: Test loss: 0.996, Test accuracy: 47.542%
Epoch 004: Train loss: 0.979, Train accuracy: 49.118%
Epoch 004: Test loss: 0.999, Test accuracy: 47.420%
Epoch 005: Train loss: 0.963, Train accuracy: 50.397%
Epoch 005: Test loss: 0.999, Test accuracy: 47.852%
Epoch 006: Train loss: 0.945, Train accuracy: 51.735%
Epoch 006: Test loss: 1.001, Test accuracy: 47.642%
Epoch 007: Train loss: 0.925, Train accuracy: 53.139%
Epoch 007: Test loss: 1.015, Test accuracy: 48.038%
Epoch 008: Train loss: 0.907, Train accuracy: 54.386%

In [None]:
# Modified Nussionv algorithm for the end of the pipe line
def Nussinov(seq, pred):

  def delta(i, j):
    if str(seq[i]) + str(seq[j]) in ('AU','UA','GC','CG', 'GU', 'UG'):
      return pred[i][0][0] + pred[j][0][1]
    else:
      return pred[i][0][2] + pred[j][0][2]


  def build_m():
    l = len(seq)
    matrix = [[ 0 for j in range(l)] for i in range(l)]
    for n in range(1, l):
      i = 0
      for j in range(n, l):
        value1 = matrix[i+1][j-1] + delta(i, j)
        value2 = matrix[i+1][j] + pred[i][0][2]
        value3 = matrix[i][j-1] + pred[i][0][2]
        if i+1 >= j:
          value4 = 0
        else:
          value4 = max([matrix[i][k] + matrix[k+1][j] for k in range(i+1,j)])
        matrix[i][j] = max(value1, value2, value3, value4)
        i += 1
    
    return matrix


  def traceback(matrix, seq, i, j, pair):
    if i<j:
      if matrix[i][j] == matrix[i+1][j]:
        traceback(matrix, seq, i+1, j, pair)
      elif matrix[i][j] == matrix[i][j-1]:
        traceback(matrix, seq, i, j-1, pair)
      elif matrix[i][j] == matrix[i+1][j-1] + delta(i, j):
        pair.append([str(i)+'', str(seq[i])+'', str(j)+''])
        traceback(matrix, seq, i+1, j-1, pair)
      else:
        for k in range(i+1,j):
          if matrix[i][j] == matrix[i][k] + matrix[k+1][j]:
            traceback(matrix, seq, i, k, pair)
            traceback(matrix, seq, k+1, j, pair)
            break

  m = build_m()
  pairs = []
  traceback(m, seq, 0, len(seq)-1, pairs)
  return pairs



# modified picture creation method for test files
def create_png_test(data, png_dir):
    for filename, d_list in data.items():
        current = os.path.join(png_dir, filename) + '/'
        isExist = os.path.exists(current)
        if not isExist:
            os.mkdir(current)
        if check(d_list[0]):
            im = np.zeros([len(d_list[0])+19, len(d_list[0]), 3])
            mat = creatmat(d_list[0])
            im[9:len(d_list[0])+9, 0:len(d_list[0]),
               0] = im[9:len(d_list[0])+9, 0:len(d_list[0]), 0] + mat
            for j in range(len(d_list[0])):
                pic = im[j:j+19]
                image = Image.fromarray(((pic)*255/3).astype(np.uint8), 'RGB')
                image = image.resize((169, 19))
                new_filename = current  + str(change(d_list[1][j])) + '._' + complete(j) + '.png'
                image.save(new_filename)


def pre_process_testfiles(rna_dir, png_dir):
    remove_pseudoknot(rna_dir)
    data = data_extract(rna_dir)
    create_png_test(data, png_dir)

# different file gathering function for testing without changing up the order of bases
def get_files_test(png_dir):
    """
    Args:
        file_dir:file directtory
    Returns:
        list of images and labels
    """
    test_labels = []
    test_pic = []

    for file in os.listdir(png_dir):
        name = file.split('.')
        if name[0] == '0':
            test_labels.append(one_hot_matrix(0, depth=3))
            test_pic.append(png_dir + file)

        elif name[0] == '1':
            test_labels.append(one_hot_matrix(1, depth=3))
            test_pic.append(png_dir + file)
        else:
            test_labels.append(one_hot_matrix(2, depth=3))
            test_pic.append(png_dir + file)

    return test_pic, test_labels

# load checkpoint given in input as a path
def load_model_from_checkpoint(checkpoint_path):
    model = InferenceModule(N_CLASSES, IMG_W, IMG_H, BATCH_SIZE)
    optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)
    checkpoint = tf.train.Checkpoint(model=model, optimizer=optimizer)
    checkpoint.restore(checkpoint_path).expect_partial()
    return model


In [None]:
test_dir_src = './drive/MyDrive/RNA/a/'
test_dir = './drive/MyDrive/RNA/test/'
model_loc = './drive/MyDrive/RNA/log/model-6'
'''enough to run once preprocessing the test files'''
# pre_process_testfiles(test_dir_src, test_dir)

t = True
N_CLASSES = 3
IMG_W = 169
IMG_H = 19
BATCH_SIZE = 1
CAPACITY = 1000

# calulcating accuracy for test rnas from the output of nussiov and the softmax units of the model
def calulate_accuracy_for_one_rna(pairs, data_colection):
  count = 0
  for j in range(len(data_colection[0])):
    index = [str(data_colection[0][j]), data_colection[1][j], str(data_colection[2][j])]
    if (index in pairs):
      pairs.remove(index)
      count += 1
    elif index[2] == '0':
      count += 1
  count -= len(pairs)
  return count/len(data_colection[0])

# calulating testing accuracy
if t:
  rnas = os.listdir('./drive/MyDrive/RNA/test/')
  loaded_model = load_model_from_checkpoint(model_loc)
  preds = []
  data_colection = []
  for rna in rnas:
      data, f = readfile(rna, test_dir_src)
      data_colection.append(data)

      preds.append([])
      test, test_label = get_files(test_dir + rna + '/')
      test_data = get_batch(test, test_label, BATCH_SIZE, IMG_W, IMG_H, CAPACITY, 2, False)
      a = 0
      for batch_te, (images_te, labels_te) in enumerate(test_data):
        test_softmax, test_logits = loaded_model(tf.convert_to_tensor(images_te), BATCH_SIZE)
        preds[-1].append((np.array(test_softmax)))

  pairs = []
  for i in range(len(preds)):
    seq = data_colection[i][1]
    pairs.append(Nussinov(seq, preds[i]))
  acc = 0
  for i in range(len(pairs)):
    acc += calulate_accuracy_for_one_rna(pairs[i], data_colection[i])
  acc /= len(pairs)
  print('Overall acuracy during test:', acc)