# Regular Genomics Project **RNA Localisation**


In [1]:
from plotting import plot_line_graph
from metrics import Pearson
from models import CNN
from utils import prepare_data, set_variables, extractY
from plotting import multiplot_pearson, bar_plot
import matplotlib.pyplot as plt
from keras.callbacks import EarlyStopping

### **1.** Data Preparation

Firstly, we import several necessary packages and load in our data:

In [2]:
train_data, valid_data, test_data = prepare_data()

### **2** |  Initializations

We use as baseline model the [RNATracker](https://github.com/HarveyYan/RNATracker/blob/master/Models/cnn_bilstm_attention.py) model and a CNN model.


In [3]:
max_seq_len = train_data['seq'].apply(lambda x: len(x)).max()


In [None]:
architectures = ["CNN_Baseline_4Conv_Struct", "CNN_Baseline_5Conv_Struct"]

history_list = []

for arch in architectures:

    model_name = arch
    model_architecture_path, model_output_path, params_dataLoader_train, params_dataLoader_valid, params_model, params_train = set_variables(model_name, max_seq_len)

    input_size = (max_seq_len, 6) if params_dataLoader_train["struct"] else (max_seq_len, 4)

    model = CNN(input_size=input_size,
                params_model=params_model,
                metrics=[Pearson(return_dict=True)]
                )

    model.print_model()
    model.summary()
    # model.summary() #if information about number of params needed

    history = model.fit_and_evaluate(train_data=train_data, eval_data=valid_data,
                                     params_train_dataLoader=params_dataLoader_train,
                                     params_eval_dataLoader=params_dataLoader_valid,
                                     params_train=params_train,
                                     callback=[EarlyStopping(monitor='loss', patience=3)])

    # multiplot_pearson(history.history)
    # plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0)

    plt_data = [history.history['loss'], history.history['val_loss']]
    plot_line_graph(plt_data, "Loss Graph", 'loss', 'epoch', ['train', 'val'])

    history_list.append(history.history)

    model.print_model(model_architecture_path)
    model.save_model(model_output_path)

In [None]:
'''Load Model'''
from keras.models import load_model
from dataloaders import GeneDataLoader
model = load_model('model_outputs/CNN_Baseline_Classic_Struct_2023-07-12.h5',
                   custom_objects={"Pearson": Pearson})

model_architecture_path, model_output_path, params_dataLoader_train, params_dataLoader_valid, params_model, params_train = set_variables("CNN_Baseline_Classic_Struct", max_seq_len)

model.evaluate(GeneDataLoader(test_data, **params_dataLoader_valid))

prediction = model.predict(GeneDataLoader(test_data, **params_dataLoader_valid))
test_data_Y = extractY(test_data)

bar_plot(test_data_Y, prediction, "pearson")

In [None]:
print(prediction)