In [1]:
import numpy as np
import pandas as pd
import xgboost as xgb
import time
%matplotlib inline
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, auc
from sklearn import svm
from sklearn import neighbors
from sklearn.tree import DecisionTreeClassifier
from sklearn import ensemble
from sklearn import model_selection
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import RandomizedSearchCV, StratifiedKFold
from imblearn.over_sampling import RandomOverSampler, SMOTE
from scipy.stats import uniform
import skops.io as sio

In [2]:
#import PTB data and display head
df_ptbdb_normal = pd.read_csv('../data/original/ptbdb_normal.csv', header = None)
df_ptbdb_abnormal = pd.read_csv('../data/original/ptbdb_abnormal.csv', header = None)

#concatenate dataframes
df_ptbdb = pd.concat([df_ptbdb_normal, df_ptbdb_abnormal], axis=0, ignore_index=True)

#delete duplicates
df_ptbdb.drop_duplicates(inplace=True)
print('number of duplicates in combined dataframe: ', df_ptbdb.duplicated().sum())

#ptb data and target
df_ptbdb_data = df_ptbdb.drop(187, axis = 1)
df_ptbdb_target = df_ptbdb[187]

number of duplicates in combined dataframe:  0


In [3]:
#load model
model = sio.load('best_easy_models/svm_best_model_cv_nsplits5_randomsearch_niter10.skops')

In [4]:
start = time.time()

#prediction on ptb data
y_pred_ptb = model.predict(df_ptbdb_data)

#crosstab for ptb target and y_pred_ptb
print(pd.crosstab(df_ptbdb_target, y_pred_ptb, colnames=['Predictions']))

end = time.time()

print(f"time: {end - start:.2f} seconds")

Predictions   0.0  1.0  2.0  3.0  4.0
187                                  
0.0          3920  111   14    0    0
1.0          9809  199  222   31  239
time: 18.11 seconds


In [5]:
#save results in txt file
with open("clf_svm_trainMIT_testPTB.txt", "w") as file:
    file.write("training data: MIT train dataset\n")
    file.write("test data: PTB dataset\n")
    file.write("clf = svm.SVC()\n")
    file.write("parameters = {'kernel': 'rbf', 'gamma': 0,5, 'C': 10}\n")
    file.write("\nData augmentation for training: no\n")
    file.write("\nConfusion Matrix:\n")
    file.write(str(pd.crosstab(df_ptbdb_target, y_pred_ptb, colnames=['Predictions'])))