In [1]:
import pandas as pd
from time import time
import datetime as dt
import libcirctis

test_samples_file =  'datasets/splits/test/samples.tsv'

parameters = {}
parameters['kernel'] = 'WD'
parameters['fold'] = 0

parameters['up_sample_size'] = 100
parameters['down_sample_size'] = 103
parameters['degree'] = parameters['up_sample_size'] + parameters['down_sample_size']
parameters['C1'] = 1.0
parameters['C2'] = 1.0

frac_test = 0.05

pd.set_option('display.max_columns', None)

In [2]:
df_samples = pd.read_csv(test_samples_file, sep='\t', header=0)
df_samples.shape

(72183, 7)

In [3]:
df_samples_pos = df_samples.loc[(df_samples['sample_label'] == 1)]
df_samples_pos.shape

(979, 7)

In [4]:
df_samples_neg = df_samples.loc[(df_samples['sample_label'] == -1)]
df_samples_neg.shape

(71204, 7)

In [5]:
df_samples_pos_test = df_samples_pos.sample(frac=frac_test, random_state = 721379)
df_samples_pos_test.shape

(49, 7)

In [6]:
df_samples_pos_train = df_samples_pos.drop(df_samples_pos_test.index)
df_samples_pos_train.shape

(930, 7)

In [7]:
df_samples_neg_test = df_samples_neg.sample(frac=frac_test, random_state = 721379)
df_samples_neg_test.shape

(3560, 7)

In [8]:
df_samples_neg_train = df_samples_neg.drop(df_samples_neg_test.index)
df_samples_neg_train.shape

(67644, 7)

In [9]:
df_samples_neg_train = df_samples_neg_train.head(df_samples_pos_train.shape[0])
df_samples_neg_train.shape

(930, 7)

In [10]:
df_samples_neg_test.shape[0] / df_samples_pos_test.shape[0]

72.65306122448979

In [11]:
df_samples_train = pd.concat([df_samples_pos_train, df_samples_neg_train])
df_samples_train.shape

(1860, 7)

In [12]:
df_samples_test = pd.concat([df_samples_pos_test, df_samples_neg_test])
df_samples_test.shape

(3609, 7)

In [13]:
df_eval = libcirctis.create_df_evaluation()

start_t = time()

print('\n' + dt.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + '\t' + str(parameters['up_sample_size']) + '\t' 
        + str(parameters['down_sample_size']) + '\t' + str(parameters['degree']) + '\t' + str(parameters['C1'])
        + '\t' + str(parameters['fold']))

X_train, y_train, X_test, y_test, samples_info = libcirctis.prepare_exp_data(parameters, df_samples_train, df_samples_test)

svm = libcirctis.train_svm (parameters, X_train, y_train)

train_t = time()
train_time = train_t - start_t
print(f'Train time: {train_time:.3f} secs')

y_pred_labels, y_pred_scores = libcirctis.svm_predict(svm, X_test)

pred_time = time() - train_t
print(f'Prediction time: {pred_time:.3f} secs')

times = {}
times['train_time'] = train_time
times['pred_time'] = pred_time

metrics = {}
metrics = libcirctis.calc_metrics_labels(metrics, y_test, y_pred_labels)
metrics = libcirctis.calc_metrics_scores(metrics, y_test, y_pred_scores)

libcirctis.add_result_in_df_evaluation(df_eval, parameters, samples_info, times, metrics)


2023-05-28 22:32:02	100	103	203	1.0	0
Train time: 9.903 secs
Prediction time: 1.449 secs


Unnamed: 0,fold,up_size,down_size,degree,C1,C2,kernel,TP,FP,FN,TN,F1-score,AUPR,Precision,Recall,Specificity,Accuracy,AUROC,n_train,n_train_pos,n_train_neg,n_test,n_test_pos,n_test_neg,sample_size,train_time,pred_time
0,0,100,103,203,1.0,1.0,WD,34,46,15,3514,0.527132,0.67805,0.425,0.693878,0.987079,0.983098,0.97156,1860,930,930,3609,49,3560,203,9.903005,1.449139
