In [None]:
ptbdb_abnormal_df = pd.read_csv("/kaggle/input/heartbeat/ptbdb_abnormal.csv", header=None)
ptbdb_normal_df = pd.read_csv("/kaggle/input/heartbeat/ptbdb_normal.csv", header=None)

mitbih_train_df = pd.read_csv("/kaggle/input/heartbeat/mitbih_train.csv", header=None)
mitbih_test_df = pd.read_csv("/kaggle/input/heartbeat/mitbih_test.csv", header=None)

#mitbih_train_df.head()
#mitbih_test_df.head()
#ptbdb_abnormal_df.head()
#ptbdb_normal_df.head()

In [None]:
# Check for null values in MIT-BIH datasets
print("MIT-BIH Train - Null values:\n", mitbih_train_df.isnull().sum())
print("MIT-BIH Test - Null values:\n", mitbih_test_df.isnull().sum())

# Check for null values in PTBDB datasets
print("PTBDB Abnormal - Null values:\n", ptbdb_abnormal_df.isnull().sum())
print("PTBDB Normal - Null values:\n", ptbdb_normal_df.isnull().sum())

In [None]:
#Check for duplicates in MIT-BIH datasets
print("MIT-BIH Train - Duplicates:", mitbih_train_df.duplicated().sum())
print("MIT-BIH Test - Duplicates:", mitbih_test_df.duplicated().sum())

# Check for duplicates in PTBDB datasets
print("PTBDB Abnormal - Duplicates:", ptbdb_abnormal_df.duplicated().sum())
print("PTBDB Normal - Duplicates:", ptbdb_normal_df.duplicated().sum())

In [None]:
print("MIT-BIH Train Shape:", mitbih_train_df.shape)
print("MIT-BIH Test Shape:", mitbih_test_df.shape)
print("PTBDB Abnormal Shape:", ptbdb_abnormal_df.shape)
print("PTBDB Normal Shape:", ptbdb_normal_df.shape)

In [None]:
# MIT-BIH datasets
X_mitbih_train = mitbih_train_df.iloc[:, :-1].values  # Features
y_mitbih_train = mitbih_train_df.iloc[:, -1].values  # Labels

X_mitbih_test = mitbih_test_df.iloc[:, :-1].values  # Features
y_mitbih_test = mitbih_test_df.iloc[:, -1].values  # Labels

# PTBDB datasets
X_ptbdb_abnormal = ptbdb_abnormal_df.iloc[:, :-1].values  # Features
y_ptbdb_abnormal = ptbdb_abnormal_df.iloc[:, -1].values  # Labels

X_ptbdb_normal = ptbdb_normal_df.iloc[:, :-1].values  # Features
y_ptbdb_normal = ptbdb_normal_df.iloc[:, -1].values  # Labels

In [None]:
import numpy as np
import pandas as pd 
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import matplotlib.pyplot as plt
import joblib
import os

# Load datasets
ptb_abnormal = pd.read_csv("/kaggle/input/heartbeat/ptbdb_abnormal.csv", header=None)
ptb_normal = pd.read_csv("/kaggle/input/heartbeat/ptbdb_normal.csv", header=None)
mit_train = pd.read_csv("/kaggle/input/heartbeat/mitbih_train.csv", header=None)
mit_test = pd.read_csv("/kaggle/input/heartbeat/mitbih_test.csv", header=None)

# Check for null values and duplicates
for name, df in zip(["MIT Train", "MIT Test", "PTB Abnormal", "PTB Normal"], [mit_train, mit_test, ptb_abnormal, ptb_normal]):
    print(f"{name} - Nulls: {df.isnull().sum().sum()}, Duplicates: {df.duplicated().sum()}")

# Extract features and labels
X_mit_train, y_mit_train = mit_train.iloc[:, :-1].values, mit_train.iloc[:, -1].values
X_mit_test, y_mit_test = mit_test.iloc[:, :-1].values, mit_test.iloc[:, -1].values
X_ptb_ab, y_ptb_ab = ptb_abnormal.iloc[:, :-1].values, ptb_abnormal.iloc[:, -1].values
X_ptb_n, y_ptb_n = ptb_normal.iloc[:, :-1].values, ptb_normal.iloc[:, -1].values

# Combine PTB datasets
X_ptb, y_ptb = np.vstack((X_ptb_ab, X_ptb_n)), np.hstack((y_ptb_ab, y_ptb_n))

# Train Random Forest on MIT-BIH
tree_mit = RandomForestClassifier(n_estimators=100, random_state=42)
tree_mit.fit(X_mit_train, y_mit_train)

# Evaluate MIT-BIH model
y_mit_pred = tree_mit.predict(X_mit_test)
print("MIT-BIH Accuracy:", accuracy_score(y_mit_test, y_mit_pred))
print("Confusion Matrix:\n", confusion_matrix(y_mit_test, y_mit_pred))
print("Classification Report:\n", classification_report(y_mit_test, y_mit_pred))

# Train Random Forest on PTBDB
tree_ptb = RandomForestClassifier(n_estimators=100, random_state=42)
tree_ptb.fit(X_ptb, y_ptb)

# Evaluate PTBDB model
y_ptb_pred = tree_ptb.predict(X_ptb)
print("PTBDB Accuracy:", accuracy_score(y_ptb, y_ptb_pred))
print("Confusion Matrix:\n", confusion_matrix(y_ptb, y_ptb_pred))
print("Classification Report:\n", classification_report(y_ptb, y_ptb_pred))

# Feature importance
plt.figure(figsize=(10, 6))
plt.bar(range(X_mit_train.shape[1]), tree_mit.feature_importances_)
plt.title("MIT-BIH Feature Importance")
plt.xlabel("Feature Index")
plt.ylabel("Importance")
plt.show()

plt.figure(figsize=(10, 6))
plt.bar(range(X_ptb.shape[1]), tree_ptb.feature_importances_)
plt.title("PTBDB Feature Importance")
plt.xlabel("Feature Index")
plt.ylabel("Importance")
plt.show()

# Save models
joblib.dump(tree_mit, "rf_mit_model.pkl")
joblib.dump(tree_ptb, "rf_ptb_model.pkl")
