In [1]:
import pandas as pd
from time import time
import datetime as dt
import libcirctis

train_samples_file = 'datasets/cross_validation/fold_1/train/samples.tsv'
test_samples_file =  'datasets/cross_validation/fold_1/validation/samples.tsv'

parameters = {}
parameters['kernel'] = 'WD'
parameters['fold'] = 0

parameters['up_sample_size'] = 100
parameters['down_sample_size'] = 103
parameters['degree'] = parameters['up_sample_size'] + parameters['down_sample_size']
parameters['C1'] = 1.0
parameters['C2'] = 1.0

pd.set_option('display.max_columns', None)

In [2]:
df_samples_train_all = pd.read_csv(train_samples_file, sep='\t', header=0)
df_samples_train_all.shape

(285165, 7)

In [3]:
df_samples_test_all = pd.read_csv(test_samples_file, sep='\t', header=0)
df_samples_test_all.shape

(67154, 7)

In [4]:
df_samples_pos_test = df_samples_test_all.loc[(df_samples_test_all['sample_label'] == 1)]
df_samples_pos_test.shape

(994, 7)

In [5]:
df_samples_neg_test = df_samples_test_all.loc[(df_samples_test_all['sample_label'] == -1)]
df_samples_neg_test.shape

(66160, 7)

In [6]:
df_samples_neg_test.shape[0] / df_samples_pos_test.shape[0]

66.55935613682092

In [7]:
df_samples_neg_test = df_samples_neg_test.sample(frac=1, random_state = 721379)
df_samples_neg_test = df_samples_neg_test.head(df_samples_pos_test.shape[0] * 15)
df_samples_neg_test.shape

(14910, 7)

In [8]:
df_samples_neg_test.shape[0] / df_samples_pos_test.shape[0]

15.0

In [9]:
df_samples_pos_train = df_samples_train_all.loc[(df_samples_train_all['sample_label'] == 1)]
df_samples_pos_train.shape

(3824, 7)

In [10]:
df_samples_neg_train = df_samples_train_all.loc[(df_samples_train_all['sample_label'] == -1)]
df_samples_neg_train.shape

(281341, 7)

In [11]:
df_samples_neg_train = df_samples_neg_train.sample(frac=1, random_state = 721379)
df_samples_neg_train = df_samples_neg_train.head(df_samples_pos_train.shape[0])
df_samples_neg_train.shape

(3824, 7)

In [12]:
df_samples_train = pd.concat([df_samples_pos_train, df_samples_neg_train])
df_samples_train.shape

(7648, 7)

In [13]:
df_samples_test = pd.concat([df_samples_pos_test, df_samples_neg_test])
df_samples_test.shape

(15904, 7)

In [14]:
df_eval = libcirctis.create_df_evaluation()

start_t = time()

print('\n' + dt.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + '\t' + str(parameters['up_sample_size']) + '\t' 
        + str(parameters['down_sample_size']) + '\t' + str(parameters['degree']) + '\t' + str(parameters['C1'])
        + '\t' + str(parameters['fold']))

X_train, y_train, X_test, y_test, samples_info = libcirctis.prepare_exp_data(parameters, df_samples_train, df_samples_test)

svm = libcirctis.train_svm (parameters, X_train, y_train)

train_t = time()
train_time = train_t - start_t
print(f'Train time: {train_time:.3f} secs')

y_pred_labels, y_pred_scores = libcirctis.svm_predict(svm, X_test)

pred_time = time() - train_t
print(f'Prediction time: {pred_time:.3f} secs')

times = {}
times['train_time'] = train_time
times['pred_time'] = pred_time

metrics = {}
metrics = libcirctis.calc_metrics_labels(metrics, y_test, y_pred_labels)
metrics = libcirctis.calc_metrics_scores(metrics, y_test, y_pred_scores)

libcirctis.add_result_in_df_evaluation(df_eval, parameters, samples_info, times, metrics)


2023-05-28 23:46:40	100	103	203	1.0	0
Train time: 289.907 secs
Prediction time: 6.642 secs


Unnamed: 0,fold,up_size,down_size,degree,C1,C2,kernel,TP,FP,FN,TN,F1-score,AUPR,Precision,Recall,Specificity,Accuracy,AUROC,n_train,n_train_pos,n_train_neg,n_test,n_test_pos,n_test_neg,sample_size,train_time,pred_time
0,0,100,103,203,1.0,1.0,WD,881,144,113,14766,0.872709,0.93392,0.859512,0.886318,0.990342,0.983841,0.986446,7648,3824,3824,15904,994,14910,203,289.907032,6.642198
