In [1]:
import pandas as pd
import numpy as np

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout,Embedding,Flatten,Conv1D,MaxPooling1D,LSTM

from tensorflow.keras.models import Sequential,load_model,Model
from tensorflow.keras import Input

import keras_tuner
from keras_tuner import HyperModel
from keras_tuner.tuners import RandomSearch

from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score, confusion_matrix, matthews_corrcoef, roc_curve, auc 

In [2]:
def CF_Validation(k,train_data,train_targets):
    num_val_samples = len(train_data) // k   # number of samples is divided by number of folds
    total_validation_data=[]
    total_validation_targets=[]
    total_train_data=[]
    total_train_targets=[]
    
    for i in range(k):
        
        val_data = train_data[i * num_val_samples: (i + 1) * num_val_samples]
        val_targets = train_targets[i * num_val_samples: (i + 1) * num_val_samples]
        
        partial_train_data = np.concatenate(
            [train_data[:i * num_val_samples],
            train_data[(i + 1) * num_val_samples:]],
            axis=0)
        partial_train_targets = np.concatenate(
            [train_targets[:i * num_val_samples],
            train_targets[(i + 1) * num_val_samples:]],
            axis=0)
    
        total_validation_data.append(val_data)
        total_validation_targets.append(val_targets) 
    
        total_train_data.append(partial_train_data)
        total_train_targets.append(partial_train_targets)
    return total_train_data,total_train_targets,total_validation_data,total_validation_targets

In [1]:
class Dataset_RNA_1():
    def __init__(self,X,y,nCV):
        self.X = X
        self.y = y
        self.unshuffled_data= None
        self.shuffled_data= None
        
        self.nCV = nCV 
          
    def shuffle_dataset(self):
        X = self.X
        y = self.y
        X_1,y_1 = shuffle(X, y)
        X_s,y_s = shuffle(X_1,y_1)
        l_test = int(len(X_s)*0.2)
        #it is used for validation 
        X_test = X_s[0:l_test]
        y_test = y_s[0:l_test]

        train_data = X_s[l_test:len(X_s)]
        train_targets =y_s[l_test:len(y_s)]
        
        self.shuffled_data = X_test, y_test, train_data, train_targets 
        return self.shuffled_data
    
    def cross_validation(self):
        k = self.nCV 
        _,_, train_data, train_targets = self.shuffled_data
            
        return CF_Validation(k,train_data,train_targets)

In [2]:
def merge_dataset(X1,y1,X2,y2):
    X_merge = np.append(X1,X2, axis=1)
    y_merge = y1
    return X_merge, y_merge

In [4]:
class Dataset_RNA_2():
    def __init__(self,X1,y1,X2,y2,nCV):
        
        self.X_merge, self.y_merge = merge_dataset(X1,y1,X2,y2)

        self.unshuffled_data= None
        self.shuffled_data= None
        
        self.nCV = nCV 
          
    def shuffle_dataset(self):
        X = self.X_merge
        y = self.y_merge
        X_1,y_1 = shuffle(X, y)
        X_s,y_s = shuffle(X_1,y_1)
        l_test = int(len(X_s)*0.2)
        #it is used for validation 
        X_test = X_s[0:l_test]
        y_test = y_s[0:l_test]

        train_data = X_s[l_test:len(X_s)]
        train_targets =y_s[l_test:len(y_s)]
        
        self.shuffled_data = X_test, y_test, train_data, train_targets 
        return self.shuffled_data
    
    def cross_validation(self):
        k = self.nCV 
        _,_, train_data, train_targets = self.shuffled_data
            
        return CF_Validation(k,train_data,train_targets)
    

In [5]:
class Dataset_RNA_3():
    def __init__(self,X1,y1,X2,y2,X3,y3,nCV):
        X_temp ,y_temp = merge_dataset(X1,y1,X2,y2)
        self.X_merge, self.y_merge = merge_dataset(X_temp,y_temp,X3,y3)

        self.unshuffled_data= None
        self.shuffled_data= None
        
        self.nCV = nCV 
          
    def shuffle_dataset(self):
        X = self.X_merge
        y = self.y_merge
        X_1,y_1 = shuffle(X, y)
        X_s,y_s = shuffle(X_1,y_1)
        l_test = int(len(X_s)*0.2)
        #it is used for validation 
        X_test = X_s[0:l_test]
        y_test = y_s[0:l_test]

        train_data = X_s[l_test:len(X_s)]
        train_targets =y_s[l_test:len(y_s)]
        
        self.shuffled_data = X_test, y_test, train_data, train_targets 
        return self.shuffled_data
    
    def cross_validation(self):
        k = self.nCV 
        _,_, train_data, train_targets = self.shuffled_data
            
        return CF_Validation(k,train_data,train_targets)