In [50]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

In [None]:
df = pd.read_csv("/content/Modified_ECG_file.csv")
df.head()

In [10]:
X = df.iloc[:, [0]]
y = df.iloc[:, 1]
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)
# Pipeline
pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler()),
    ('clf', LogisticRegression(max_iter=3000))
])

In [52]:
#Decision Tree(K-Fold Cross-Validation)
model = DecisionTreeClassifier()
kfold = KFold(n_splits=5, shuffle=True, random_state=42)
scores = cross_val_score(model, X, y_classified, cv=kfold)
print("Cross-validation scores:", scores)
print("Average CV Score:", scores.mean())

Cross-validation scores: [0.73333333 0.7875     0.725      0.775      0.7375    ]
Average CV Score: 0.7516666666666666


In [41]:
#GridSearchCV
c_space = np.logspace(-5, 8, 15)
param_grid = {'clf__C': c_space}
logreg_cv = GridSearchCV(pipeline, param_grid, cv=5, scoring='accuracy')
logreg_cv.fit(X_train, y_train)
print("Tuned Logistic Regression Parameters:", logreg_cv.best_params_)
print("Best CV Accuracy:", logreg_cv.best_score_)


Tuned Logistic Regression Parameters: {'clf__C': np.float64(0.05179474679231213)}
Best CV Accuracy: 0.7011904761904763


In [46]:
#Cross-Validation (Holdout Method)
print("Training set size:", len(X_train))
print("Testing set size:", len(X_test))

Training set size: 840
Testing set size: 360


In [56]:
y_pred = logreg_cv.best_estimator_.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 0.7055555555555556
