In [None]:
import pandas as pd
from time import time
import datetime as dt
import libcirctis

train_samples_file = 'datasets/cross_validation/fold_1/train/samples.tsv'
test_samples_file =  'datasets/cross_validation/fold_1/validation/samples.tsv'

parameters = {}
parameters['kernel'] = 'WD'
parameters['fold'] = 0

parameters['up_sample_size'] = 100
parameters['down_sample_size'] = 103
parameters['degree'] = parameters['up_sample_size'] + parameters['down_sample_size']
parameters['C1'] = 1.0
parameters['C2'] = 1.0

pd.set_option('display.max_columns', None)

In [None]:
df_samples_train = pd.read_csv(train_samples_file, sep='\t', header=0)
df_samples_test = pd.read_csv(test_samples_file, sep='\t', header=0)

In [None]:
df_samples_train_pos = df_samples_train.loc[(df_samples_train['sample_label'] == 1)]
df_samples_train_pos

In [None]:
df_samples_train_neg = df_samples_train.loc[(df_samples_train['sample_label'] == -1)]
df_samples_train_neg

In [None]:
df_samples_train_neg = df_samples_train_neg.sample(frac=1, random_state=721379).reset_index(drop=True)
df_samples_train_neg

In [None]:
df_samples_train_neg = df_samples_train_neg.head(df_samples_train_pos.shape[0])
df_samples_train_neg

In [None]:
df_samples_train = pd.concat(df_samples_train_pos, df_samples_train_neg)
df_samples_train

In [None]:
df_eval = libcirctis.create_df_evaluation()

start_t = time()

print('\n' + dt.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + '\t' + str(parameters['up_sample_size']) + '\t' 
        + str(parameters['down_sample_size']) + '\t' + str(parameters['degree']) + '\t' + str(parameters['C1'])
        + '\t' + str(parameters['fold']))

X_train, y_train, X_test, y_test, samples_info = libcirctis.prepare_exp_data(parameters, df_samples_train, df_samples_test)

svm = libcirctis.train_svm (parameters, X_train, y_train)

train_t = time()
train_time = train_t - start_t
print(f'Train time: {train_time:.3f} secs')

y_pred_labels, y_pred_scores = libcirctis.svm_predict(svm, X_test)

pred_time = time() - train_t
print(f'Prediction time: {pred_time:.3f} secs')

times = {}
times['train_time'] = train_time
times['pred_time'] = pred_time

metrics = {}
metrics = libcirctis.calc_metrics_labels(metrics, y_test, y_pred_labels)
metrics = libcirctis.calc_metrics_scores(metrics, y_test, y_pred_scores)

libcirctis.add_result_in_df_evaluation(df_eval, parameters, samples_info, times, metrics)

In [None]:
y_pred_labels

In [None]:
df_samples_test['pred_label'] = y_pred_labels.astype(int)
df_samples_test

In [None]:
df_samples_test.loc[(df_samples_test['TIS_type'] == 'ATG')]

In [None]:
df_FP = df_samples_test.loc[(df_samples_test['sample_label'] == -1) & (df_samples_test['pred_label'] == 1)]
df_FP

In [None]:
df_FP.loc[(df_FP['TIS_type'] == 'ATG')]

In [None]:
df_samples_train

In [None]:
df_train_atg = df_samples_train.loc[(df_samples_train['sample_label'] == 1) & (df_samples_train['TIS_type'] == 'ATG')]
df_train_atg

In [None]:
df_train_nc = df_samples_train.loc[(df_samples_train['sample_label'] == 1) & (df_samples_train['TIS_type'] != 'ATG')]
df_train_nc

In [None]:
df_train_atg = df_samples_train.loc[(df_samples_train['sample_label'] == -1) & (df_samples_train['TIS_type'] == 'ATG')]
df_train_atg

In [None]:
df_train_nc = df_samples_train.loc[(df_samples_train['sample_label'] == -1) & (df_samples_train['TIS_type'] != 'ATG')]
df_train_nc