In [1]:
import pandas as pd
from time import time
import pickle
import libcirctis

output_model_file = 'circTIS_model/circTIS_model_v1.pkl'

In [2]:
parameters = {}
parameters['kernel'] = 'WD'

parameters['up_sample_size'] = 40
parameters['down_sample_size'] = 287
parameters['degree'] = parameters['up_sample_size'] + parameters['down_sample_size']
parameters['C1'] = parameters['C2'] = 1.55

In [3]:
# Loading training data
train_samples_file_1 = 'datasets/cross_validation/fold_1/train/samples.tsv'
train_samples_file_2 = 'datasets/cross_validation/fold_1/validation/samples.tsv'

df_samples_train_1 = pd.read_csv(train_samples_file_1, sep='\t', header=0)
df_samples_train_2 = pd.read_csv(train_samples_file_2, sep='\t', header=0)
df_samples_train_all = pd.concat([df_samples_train_1, df_samples_train_2])
df_samples_train_all.shape

(352319, 7)

In [4]:
# Balancing training data
df_samples_pos_train = df_samples_train_all.loc[(df_samples_train_all['sample_label'] == 1)]
df_samples_neg_train = df_samples_train_all.loc[(df_samples_train_all['sample_label'] == -1)]
df_samples_neg_train = df_samples_neg_train.sample(frac=1, random_state=721379)
df_samples_neg_train = df_samples_neg_train.head(df_samples_pos_train.shape[0])
df_samples_train = pd.concat([df_samples_pos_train, df_samples_neg_train])

In [5]:
X_train, y_train = libcirctis.prepare_train_data(parameters, df_samples_train)

start_t = time()

svm = libcirctis.train_svm (parameters, X_train, y_train)

train_t = time()
train_time = train_t - start_t
print(f'Train time: {train_time:.3f} secs')

Train time: 94.022 secs


In [6]:
# Saving model
with open(output_model_file, "wb") as f:
    pickle.dump(svm, f)