In [2]:
# import essential libraries
import pandas as pd
import numpy as np
import pickle
import scipy
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.linear_model import LogisticRegression, LinearRegression, LogisticRegressionCV, SGDClassifier, RidgeClassifier
from sklearn.metrics import accuracy_score, r2_score, mean_squared_error, mean_absolute_error, f1_score
from sklearn.model_selection import train_test_split, cross_validate, GridSearchCV, RandomizedSearchCV
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn import svm, linear_model
from sklearn import tree, metrics
from sklearn.metrics import classification_report

from sklearn.preprocessing import LabelEncoder
import lightgbm
from bayes_opt import BayesianOptimization
from catboost import CatBoostClassifier, cv, Pool
import gzip

In [3]:
# read in csv
df = pd.read_csv('data/training_25_features.csv')
df

Unnamed: 0,I,II,III,aVF,aVR,aVL,V1,V2,V3,V4,...,weight,nurse,site,device,heart_axis,validated_by,second_opinion,validated_by_human,pacemaker,strat_fold
0,-0.005,0.135,0.140,-0.065,-0.073,0.137,-0.125,-0.090,-0.110,-0.210,...,69.841845,0.0,0.0,0,3.0,0.0,0,0,0.0,6
1,-0.005,0.135,0.140,-0.065,-0.073,0.137,-0.125,-0.090,-0.110,-0.211,...,69.841845,0.0,0.0,0,3.0,0.0,0,0,0.0,6
2,-0.005,0.131,0.136,-0.063,-0.070,0.133,-0.125,-0.082,-0.102,-0.190,...,69.841845,0.0,0.0,0,3.0,0.0,0,0,0.0,6
3,-0.005,0.130,0.135,-0.063,-0.070,0.132,-0.122,-0.077,-0.094,-0.172,...,69.841845,0.0,0.0,0,3.0,0.0,0,0,0.0,6
4,-0.005,0.128,0.133,-0.062,-0.069,0.130,-0.119,-0.071,-0.084,-0.157,...,69.841845,0.0,0.0,0,3.0,0.0,0,0,0.0,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4319171,0.010,0.170,0.160,-0.090,-0.075,0.165,0.155,0.365,0.230,0.030,...,69.841845,1.0,2.0,1,3.0,1.0,0,1,0.0,8
4319172,0.014,0.174,0.160,-0.094,-0.073,0.167,0.155,0.368,0.245,0.029,...,69.841845,1.0,2.0,1,3.0,1.0,0,1,0.0,8
4319173,0.016,0.176,0.160,-0.096,-0.073,0.167,0.155,0.383,0.261,0.040,...,69.841845,1.0,2.0,1,3.0,1.0,0,1,0.0,8
4319174,0.014,0.174,0.160,-0.094,-0.073,0.167,0.155,0.406,0.282,0.059,...,69.841845,1.0,2.0,1,3.0,1.0,0,1,0.0,8


In [4]:
# convert all the columns to float64
# df = df.drop(['nurse','site','strat_fold'],axis='columns')

for i in range(23):
    df[df.columns[i]] = df[df.columns[i]].astype('float64')
    
# get info for columns
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4319176 entries, 0 to 4319175
Data columns (total 26 columns):
 #   Column              Dtype  
---  ------              -----  
 0   I                   float64
 1   II                  float64
 2   III                 float64
 3   aVF                 float64
 4   aVR                 float64
 5   aVL                 float64
 6   V1                  float64
 7   V2                  float64
 8   V3                  float64
 9   V4                  float64
 10  V5                  float64
 11  V6                  float64
 12  ritmi               float64
 13  age                 float64
 14  sex                 float64
 15  height              float64
 16  weight              float64
 17  nurse               float64
 18  site                float64
 19  device              float64
 20  heart_axis          float64
 21  validated_by        float64
 22  second_opinion      float64
 23  validated_by_human  int64  
 24  pacemaker           floa

In [5]:
# train-test split
X = df.drop(columns='ritmi')
y = df['ritmi']
X_train, X_test, y_train, y_test=train_test_split(X, y, test_size = 0.25, random_state = 1234)

# KNN

In [5]:
clfl2 = KNeighborsClassifier()
parameters = {'n_neighbors': [140], 'weights': ['distance'], 'metric': ['euclidean']} #0.975402556683289

fitmodel = GridSearchCV(clfl2, param_grid=parameters, cv=5, refit=True, scoring="accuracy", n_jobs=-1, verbose=2)
fitmodel.fit(X_train, y_train)

print(fitmodel.best_estimator_, fitmodel.best_params_, fitmodel.best_score_)

Fitting 5 folds for each of 1 candidates, totalling 5 fits
KNeighborsClassifier(metric='euclidean', n_neighbors=140, weights='distance') {'metric': 'euclidean', 'n_neighbors': 140, 'weights': 'distance'} 0.9754502556683289


In [7]:
import pickle as pkl
pkl.dump(fitmodel, open('ECG&patient_KNN.pkl', 'wb'))

In [8]:
y_pred = fitmodel.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

         0.0       0.99      0.98      0.98    335792
         1.0       0.96      0.97      0.97    267846
         2.0       0.98      0.98      0.98    476156

    accuracy                           0.98   1079794
   macro avg       0.98      0.98      0.98   1079794
weighted avg       0.98      0.98      0.98   1079794



# XGboost

In [9]:
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV

In [11]:
estimator = XGBClassifier(
    objective= 'multi:softmax',
    num_class=3,
    nthread=4,
    seed=42
)
parameters = {
    'max_depth': range (2, 10, 1),
    'n_estimators': range(60, 220, 40),
    'learning_rate': [0.1, 0.01, 0.05]
}
grid_search = GridSearchCV(
    estimator=estimator,
    param_grid=parameters,
    scoring = 'roc_auc',
    n_jobs = -1,
    cv = 5,
    verbose=4
)

In [None]:
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 96 candidates, totalling 480 fits


In [None]:
print(grid_search.best_estimator_, grid_search.best_params_, grid_search.best_score_)

# Dask implementation

In [1]:
from dask_kubernetes import KubeCluster

  from dask_kubernetes import KubeCluster


In [29]:
client = Client(n_workers = 4, threads_per_worker = 1, memory_limit = '4GB')

In [3]:
from dask.distributed import Client,progress
c= Client(n_workers = 4, threads_per_worker = 1, memory_limit = '3GB')
c

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:56501/status,

0,1
Dashboard: http://127.0.0.1:56501/status,Workers: 4
Total threads: 4,Total memory: 11.18 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:56502,Workers: 4
Dashboard: http://127.0.0.1:56501/status,Total threads: 4
Started: Just now,Total memory: 11.18 GiB

0,1
Comm: tcp://127.0.0.1:56538,Total threads: 1
Dashboard: http://127.0.0.1:56541/status,Memory: 2.79 GiB
Nanny: tcp://127.0.0.1:56506,
Local directory: C:\Users\ashwi\AppData\Local\Temp\dask-worker-space\worker-nt8j9tpf,Local directory: C:\Users\ashwi\AppData\Local\Temp\dask-worker-space\worker-nt8j9tpf

0,1
Comm: tcp://127.0.0.1:56535,Total threads: 1
Dashboard: http://127.0.0.1:56539/status,Memory: 2.79 GiB
Nanny: tcp://127.0.0.1:56507,
Local directory: C:\Users\ashwi\AppData\Local\Temp\dask-worker-space\worker-2yko9x19,Local directory: C:\Users\ashwi\AppData\Local\Temp\dask-worker-space\worker-2yko9x19

0,1
Comm: tcp://127.0.0.1:56534,Total threads: 1
Dashboard: http://127.0.0.1:56536/status,Memory: 2.79 GiB
Nanny: tcp://127.0.0.1:56508,
Local directory: C:\Users\ashwi\AppData\Local\Temp\dask-worker-space\worker-ckch0m13,Local directory: C:\Users\ashwi\AppData\Local\Temp\dask-worker-space\worker-ckch0m13

0,1
Comm: tcp://127.0.0.1:56543,Total threads: 1
Dashboard: http://127.0.0.1:56544/status,Memory: 2.79 GiB
Nanny: tcp://127.0.0.1:56505,
Local directory: C:\Users\ashwi\AppData\Local\Temp\dask-worker-space\worker-3nyly7ld,Local directory: C:\Users\ashwi\AppData\Local\Temp\dask-worker-space\worker-3nyly7ld


In [4]:
import dask.dataframe as dd
data = dd.read_csv('data/training_25_features.csv')
data = data.persist()
progress(data)

VBox()

In [11]:
data

Unnamed: 0_level_0,I,II,III,aVF,aVR,aVL,V1,V2,V3,V4,V5,V6,ritmi,age,sex,height,weight,nurse,site,device,heart_axis,validated_by,second_opinion,validated_by_human,pacemaker,strat_fold
npartitions=9,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1
,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,int64,float64,int64,float64,float64,float64,float64,int64,float64,float64,int64,int64,float64,int64
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...


In [18]:
data.info()

<class 'dask.dataframe.core.DataFrame'>
Columns: 26 entries, I to strat_fold
dtypes: float64(24), int64(2)

In [5]:
# convert all the columns to float64
# df = df.drop(['nurse','site','strat_fold'],axis='columns')
df=data
for i in range(23):
    df[df.columns[i]] = df[df.columns[i]].astype('float64')
    
# get info for columns
df.info()

<class 'dask.dataframe.core.DataFrame'>
Columns: 26 entries, I to strat_fold
dtypes: float64(24), int64(2)

In [6]:
# train-test split
X = df.drop(columns='ritmi')
y = df['ritmi']
X_train, X_test, y_train, y_test=train_test_split(X, y, test_size = 0.25, random_state = 1234)

AttributeError: 'DataFrame' object has no attribute 'take'

In [9]:
import joblib
from sklearn.ensemble import RandomForestClassifier
import dask_ml

In [5]:
cluster = KubeCluster(n_workers=30)

ValueError: Worker pod specification not provided. See KubeCluster docstring for ways to specify workers

In [None]:
with joblib.parallel_background()

# SGD classifier

In [1]:
import numpy as np
import pandas as pd
from sklearn import linear_model
from sklearn.model_selection import train_test_split
# read in csv
df = pd.read_csv('data/training_25_features.csv')
print('-->DATA LOADED')
# train-test split
for i in range(23):
    df[df.columns[i]] = df[df.columns[i]].astype('float64')
X = df.drop(columns='ritmi')
y = df['ritmi']
X_train, X_test, y_train, y_test=train_test_split(X, y, test_size = 0.25, random_state = 1234)
print('-->DATA SPLIT DONE')
print('-->MODEL TRAINING')
fitmodel = linear_model.SGDClassifier(n_jobs=10, max_iter = 1000, tol=1e-3,penalty = "elasticnet", verbose=True)
fitmodel.fit(X_train, y_train)
import pickle as pkl
print('-->SAVING MODEL')
pkl.dump(fitmodel, open('ECG&patient_SGD.pkl', 'wb'))
print('-->MODEL SAVED')
print('-->Testing model on test dataset')
y_pred = fitmodel.predict(X_test)
print(classification_report(y_test, y_pred))

-->DATA LOADED


NameError: name 'train_test_split' is not defined

In [8]:
loaded_model = pickle.load(open("ECG&patient_SGD.pkl", 'rb'))
print('-->Testing model on test dataset')
y_pred = loaded_model.predict(X_test)
print(classification_report(y_test, y_pred))
print('ACCURACY:',accuracy_score(y_test, y_pred),'\nMEAN ABS ERROR:',mean_absolute_error(y_test, y_pred))

-->Testing model on test dataset
              precision    recall  f1-score   support

         0.0       0.38      0.50      0.43    335792
         1.0       0.43      0.49      0.45    267846
         2.0       0.62      0.43      0.51    476156

    accuracy                           0.47   1079794
   macro avg       0.48      0.47      0.47   1079794
weighted avg       0.50      0.47      0.47   1079794

ACCURACY: 0.4678114529252802 
MEAN ABS ERROR: 0.773654048827832


In [15]:
from sklearn.model_selection import ParameterGrid
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import roc_auc_score
import parfit.parfit as pf

parameters = {
    'alpha': [1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3], # learning rate
    'loss': ['log'], # logistic regression,
    'penalty': ['l2'],
    'n_jobs': [-1]
}

fitmodel = GridSearchCV(SGDClassifier(), param_grid=parameters, cv=5, refit=True, scoring="accuracy", n_jobs=-1, verbose=True)
fitmodel.fit(X_train, y_train)

print(fitmodel.best_estimator_, fitmodel.best_params_, fitmodel.best_score_)

Fitting 5 folds for each of 8 candidates, totalling 40 fits




SGDClassifier(alpha=0.001, loss='log', n_jobs=-1) {'alpha': 0.001, 'loss': 'log', 'n_jobs': -1, 'penalty': 'l2'} 0.4686437156597691


In [17]:
y_pred = fitmodel.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

         0.0       0.38      0.36      0.37    335792
         1.0       0.50      0.25      0.34    267846
         2.0       0.51      0.67      0.58    476156

    accuracy                           0.47   1079794
   macro avg       0.46      0.43      0.43   1079794
weighted avg       0.47      0.47      0.45   1079794



In [9]:
dfecg = df[['I', 'II', 'III', 'aVF', 'aVR', 'aVL', 'V1', 'V2', 'V3', 'V4', 'V5','V6','ritmi']]
dfecg

Unnamed: 0,I,II,III,aVF,aVR,aVL,V1,V2,V3,V4,V5,V6,ritmi
0,-0.005,0.135,0.140,-0.065,-0.073,0.137,-0.125,-0.090,-0.110,-0.210,-0.145,-0.080,2.0
1,-0.005,0.135,0.140,-0.065,-0.073,0.137,-0.125,-0.090,-0.110,-0.211,-0.146,-0.080,2.0
2,-0.005,0.131,0.136,-0.063,-0.070,0.133,-0.125,-0.082,-0.102,-0.190,-0.129,-0.072,2.0
3,-0.005,0.130,0.135,-0.063,-0.070,0.132,-0.122,-0.077,-0.094,-0.172,-0.116,-0.067,2.0
4,-0.005,0.128,0.133,-0.062,-0.069,0.130,-0.119,-0.071,-0.084,-0.157,-0.102,-0.061,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4319171,0.010,0.170,0.160,-0.090,-0.075,0.165,0.155,0.365,0.230,0.030,-0.065,-0.060,0.0
4319172,0.014,0.174,0.160,-0.094,-0.073,0.167,0.155,0.368,0.245,0.029,-0.057,-0.056,0.0
4319173,0.016,0.176,0.160,-0.096,-0.073,0.167,0.155,0.383,0.261,0.040,-0.052,-0.055,0.0
4319174,0.014,0.174,0.160,-0.094,-0.073,0.167,0.155,0.406,0.282,0.059,-0.046,-0.053,0.0


In [10]:
X = dfecg.drop(columns='ritmi')
y = dfecg['ritmi']
X_train, X_test, y_train, y_test=train_test_split(X, y, test_size = 0.25, random_state = 1234)

In [11]:
parameters = {
    'alpha': [1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3], # learning rate
    'loss': ['log'], # logistic regression,
    'penalty': ['l2'],
    'n_jobs': [-1]
}

fitmodel = SGDClassifier(n_jobs=10, max_iter = 1000, tol=1e-3,penalty = "elasticnet", verbose=True)
fitmodel.fit(X_train, y_train)

[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.


-- Epoch 1
-- Epoch 1
-- Epoch 1
Norm: 4.49, NNZs: 0, Bias: -0.994884, T: 3239382, Avg. loss: 0.634024
Total training time: 1.48 seconds.
-- Epoch 2
Norm: 4.49, NNZs: 1, Bias: -1.000086, T: 3239382, Avg. loss: 0.502812
Total training time: 1.66 seconds.
-- Epoch 2
Norm: 5.22, NNZs: 3, Bias: -0.998877, T: 3239382, Avg. loss: 0.895321
Total training time: 1.70 seconds.
-- Epoch 2
Norm: 3.23, NNZs: 1, Bias: -0.998626, T: 6478764, Avg. loss: 0.624146
Total training time: 2.80 seconds.
-- Epoch 3
Norm: 3.20, NNZs: 1, Bias: -1.001576, T: 6478764, Avg. loss: 0.494891
Total training time: 3.29 seconds.
-- Epoch 3
Norm: 3.75, NNZs: 2, Bias: -0.995519, T: 6478764, Avg. loss: 0.883552
Total training time: 3.30 seconds.
-- Epoch 3
Norm: 2.65, NNZs: 2, Bias: -0.998968, T: 9718146, Avg. loss: 0.623808
Total training time: 4.20 seconds.
-- Epoch 4
Norm: 2.62, NNZs: 2, Bias: -1.000019, T: 9718146, Avg. loss: 0.494644
Total training time: 4.92 seconds.
-- Epoch 4
Norm: 3.07, NNZs: 1, Bias: -0.997558, T

[Parallel(n_jobs=10)]: Done   3 out of   3 | elapsed:   11.2s finished


In [13]:
y_pred = fitmodel.predict(X_test)
print(classification_report(y_test, y_pred))
print('ACCURACY:',accuracy_score(y_test, y_pred),'\nMEAN ABS ERROR:',mean_absolute_error(y_test, y_pred))

              precision    recall  f1-score   support

         0.0       0.17      0.00      0.01    335792
         1.0       0.23      0.05      0.08    267846
         2.0       0.44      0.94      0.60    476156

    accuracy                           0.43   1079794
   macro avg       0.28      0.33      0.23   1079794
weighted avg       0.31      0.43      0.29   1079794

ACCURACY: 0.4259951435181155 
MEAN ABS ERROR: 0.8716477402171154


# Decision Tree

In [2]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.metrics import classification_report
# read in csv
df = pd.read_csv('data/training_25_features.csv')
print('-->DATA LOADED')
# train-test split
for i in range(23):
    df[df.columns[i]] = df[df.columns[i]].astype('float64')
X = df.drop(columns='ritmi')
y = df['ritmi']
X_train, X_test, y_train, y_test=train_test_split(X, y, test_size = 0.25, random_state = 1234)
print('-->DATA SPLIT DONE')
print('-->MODEL TRAINING')
'''parameters = { 
    'criterion':['gini','entropy']
}'''
'''fitmodel = GridSearchCV(
    DecisionTreeClassifier(), 
    param_grid=parameters,
    refit=True, scoring="accuracy", 
    n_jobs=10, 
    verbose=10
)'''
fitmodel = DecisionTreeClassifier()
fitmodel.fit(X_train, y_train)
import pickle as pkl
print('-->SAVING MODEL')
pkl.dump(fitmodel, open('ECG&patient_DT.pkl', 'wb'))
print('-->MODEL SAVED')
print('-->Testing model on test dataset')
y_pred = fitmodel.predict(X_test)
print(classification_report(y_test, y_pred))

-->DATA LOADED
-->DATA SPLIT DONE
-->MODEL TRAINING
-->SAVING MODEL
-->MODEL SAVED
-->Testing model on test dataset
              precision    recall  f1-score   support

         0.0       0.99      0.99      0.99    335792
         1.0       0.97      0.98      0.97    267846
         2.0       0.98      0.98      0.98    476156

    accuracy                           0.98   1079794
   macro avg       0.98      0.98      0.98   1079794
weighted avg       0.98      0.98      0.98   1079794



In [6]:
from sklearn.metrics import accuracy_score,mean_absolute_error
print('ACCURACY:',accuracy_score(y_test, y_pred),'\nMEAN ABS ERROR:',mean_absolute_error(y_test, y_pred))

ACCURACY: 0.9821039939099495 
MEAN ABS ERROR: 0.022980309207126546


In [7]:
dfecg = df[['I', 'II', 'III', 'aVF', 'aVR', 'aVL', 'V1', 'V2', 'V3', 'V4', 'V5','V6','ritmi']]
X = dfecg.drop(columns='ritmi')
y = dfecg['ritmi']
X_train, X_test, y_train, y_test=train_test_split(X, y, test_size = 0.25, random_state = 1234)

In [8]:
fitmodel = DecisionTreeClassifier()
fitmodel.fit(X_train, y_train)
import pickle as pkl
print('-->SAVING MODEL')
pkl.dump(fitmodel, open('ECG_DT.pkl', 'wb'))
print('-->MODEL SAVED')
print('-->Testing model on test dataset')
y_pred = fitmodel.predict(X_test)
print(classification_report(y_test, y_pred))
print('ACCURACY:',accuracy_score(y_test, y_pred),'\nMEAN ABS ERROR:',mean_absolute_error(y_test, y_pred))

-->SAVING MODEL
-->MODEL SAVED
-->Testing model on test dataset
              precision    recall  f1-score   support

         0.0       0.79      0.79      0.79    335792
         1.0       0.76      0.76      0.76    267846
         2.0       0.82      0.81      0.82    476156

    accuracy                           0.79   1079794
   macro avg       0.79      0.79      0.79   1079794
weighted avg       0.79      0.79      0.79   1079794

ACCURACY: 0.7928558595435796 
MEAN ABS ERROR: 0.29506646638155054


# Random Forest

In [None]:
dfecg = df[['I', 'II', 'III', 'aVF', 'aVR', 'aVL', 'V1', 'V2', 'V3', 'V4', 'V5','V6','ritmi']]
X = dfecg.drop(columns='ritmi')
y = dfecg['ritmi']
X_train, X_test, y_train, y_test=train_test_split(X, y, test_size = 0.25, random_state = 1234)

In [9]:
from sklearn.ensemble import RandomForestClassifier
fitmodel = RandomForestClassifier()
fitmodel.fit(X_train, y_train)
import pickle as pkl
print('-->SAVING MODEL')
pkl.dump(fitmodel, open('ECG_RF.pkl', 'wb'))
print('-->MODEL SAVED')
print('-->Testing model on test dataset')
y_pred = fitmodel.predict(X_test)
print(classification_report(y_test, y_pred))
print('ACCURACY:',accuracy_score(y_test, y_pred),'\nMEAN ABS ERROR:',mean_absolute_error(y_test, y_pred))

-->SAVING MODEL
-->MODEL SAVED
-->Testing model on test dataset
              precision    recall  f1-score   support

         0.0       0.92      0.86      0.89    335792
         1.0       0.90      0.85      0.87    267846
         2.0       0.87      0.93      0.90    476156

    accuracy                           0.89   1079794
   macro avg       0.90      0.88      0.89   1079794
weighted avg       0.89      0.89      0.89   1079794

ACCURACY: 0.8907365664191503 
MEAN ABS ERROR: 0.15806811299192253


In [10]:
X = df.drop(columns='ritmi')
y = df['ritmi']
X_train, X_test, y_train, y_test=train_test_split(X, y, test_size = 0.25, random_state = 1234)

In [11]:
from sklearn.ensemble import RandomForestClassifier
fitmodel = RandomForestClassifier()
fitmodel.fit(X_train, y_train)
import pickle as pkl
print('-->SAVING MODEL')
pkl.dump(fitmodel, open('ECG&patient_RF.pkl', 'wb'))
print('-->MODEL SAVED')
print('-->Testing model on test dataset')
y_pred = fitmodel.predict(X_test)
print(classification_report(y_test, y_pred))
print('ACCURACY:',accuracy_score(y_test, y_pred),'\nMEAN ABS ERROR:',mean_absolute_error(y_test, y_pred))

-->SAVING MODEL
-->MODEL SAVED
-->Testing model on test dataset
              precision    recall  f1-score   support

         0.0       1.00      0.99      0.99    335792
         1.0       0.98      0.98      0.98    267846
         2.0       0.99      0.99      0.99    476156

    accuracy                           0.99   1079794
   macro avg       0.99      0.99      0.99   1079794
weighted avg       0.99      0.99      0.99   1079794

ACCURACY: 0.9871049477955981 
MEAN ABS ERROR: 0.015805792586363696


# KNN

In [1]:
import pickle as pkl
model = pkl.load(open("ECG&patient_KNN.pkl", 'rb'))

In [6]:
y_pred = model.predict(X_test)

In [7]:
from sklearn.metrics import accuracy_score,mean_absolute_error
print('ACCURACY:',accuracy_score(y_test, y_pred),'\nMEAN ABS ERROR:',mean_absolute_error(y_test, y_pred))

ACCURACY: 0.9763121484283114 
MEAN ABS ERROR: 0.03120780445158984
