In [None]:
from PC6_encoding import get_PC6_features_labels
from model import train_pc6_model
from model_tools import learning_curve, evalution_metrics
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import load_model

In [None]:
import os
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

In [None]:
# load train data
train_data, train_labels = get_PC6_features_labels('../data/afp_pos_seq_len50train_2710.fasta',
                                                      '../data/afp_neg_seq_len50train_2710.fasta',length=50)
# load test data
test_data, test_labels = get_PC6_features_labels('../data/afp_pos_seq_len50independant_test_301.fasta',
                                                      '../data/afp_neg_seq_len50independant_test_301.fasta',length=50)

In [None]:
train_data_, test_data_, train_labels_, test_labels_ = train_test_split(train_data, train_labels, test_size= 0.1, random_state = 1, stratify = train_labels)


## CNN model training

In [None]:
t_m = train_pc6_model(train_data_, train_labels_, test_data_, test_labels_, 'pc6', path = '../PC6_model')

In [None]:
learning_curve(t_m.history)

In [None]:
model = load_model('../PC6_model/pc6_final_weights.h5')
labels_score = model.predict(test_data)
evalution_metrics(test_labels, labels_score)

## 10-fold cross-validation

In [None]:
from sklearn.model_selection import KFold

def fold_cv(train_data, labels, model_name=None, output_dir = '.'):
    if not os.path.isdir(output_dir):
            os.makedirs(output_dir)
    kfold = KFold(n_splits=10, shuffle=True)
    # K-fold Cross Validation model evaluation
    df = pd.DataFrame(columns=['accuracy', 'precision', 'sensitivity', 'specificity', 'f1', 'mcc'])
    fold_no = 1
    for train, val in kfold.split(train_data, labels):
        # Generate a print
        print('------------------------------------------------------------------------')
        print('Training for fold {fold_no} ...')
        
        train_pc6_model(train_data[train], labels[train], train_data[val], labels[val], model_name = 'kfold%s'%fold_no, path = output_dir)
        model = load_model(os.path.join(output_dir, 'kfold%s_best_weights.h5'%fold_no))
        
        labels_score = model.predict(train_data[val])
        metrics_dict = evalution_metrics(labels[val], labels_score, save=False)
        print(metrics_dict)
        df.loc[fold_no] = metrics_dict.values()
        # Increase fold number
        fold_no = fold_no + 1
    df.loc['Mean'] = df.mean()
    df.to_csv(os.path.join(output_dir,'%s_cv.csv'%model_name))
    return(df)

In [None]:
fold_cv(train_data, train_labels, output_dir = '../PC6_model/10_fold')