# Required libraries

In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor, KNeighborsClassifier
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
import numpy as np
import warnings
import seaborn as sns
import matplotlib.pyplot as plt


# Display Settings and Ignoring Warnings

In [6]:
warnings.filterwarnings("ignore")

# Display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.float_format', lambda x: '%.3f' % x)

# Data Frame Overview

In [7]:
def check_df(dataframe, head=5):
    print("###### Shape ######")
    print(dataframe.shape)
    print("\n###### Types ######")
    print(dataframe.dtypes)
    print("\n###### Head ######")
    print(dataframe.head(head))
    print("\n###### Tail ######")
    print(dataframe.tail(head))
    print("\n###### NaN ######")
    print(dataframe.isnull().sum())
    print("\n###### Quantiles ######")
    print(dataframe.describe().T)

# Reading Data Set

In [8]:
df = pd.read_csv("heart.csv")
df

Unnamed: 0.1,Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,0,62,0,0,160,203,0,0,140,0,0.100,1,0,3,1
1,1,43,1,2,122,193,0,1,149,1,1.000,0,0,3,1
2,2,42,1,1,140,318,0,1,161,0,1.400,1,0,2,1
3,3,40,1,1,120,160,0,0,103,1,0.000,1,0,2,0
4,4,43,1,2,120,204,0,1,168,1,3.400,2,1,2,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
223645,223645,58,1,0,125,258,0,0,160,1,0.000,2,2,2,0
223646,223646,51,0,0,134,313,0,0,145,1,0.000,1,0,2,1
223647,223647,48,0,0,130,236,0,1,125,0,0.000,2,0,2,0
223648,223648,59,1,0,112,243,1,1,173,0,0.000,1,0,3,1


# Identify Categorical and Numerical Columns

In [9]:
def grab_col_names(dataframe, cat_th=10, car_th=20):
    cat_cols = [col for col in dataframe.columns if dataframe[col].dtypes == "O"]
    num_but_cat = [col for col in dataframe.columns if dataframe[col].nunique() < cat_th and dataframe[col].dtypes != "O"]
    cat_but_car = [col for col in dataframe.columns if dataframe[col].nunique() > car_th and dataframe[col].dtypes == "O"]

    cat_cols = cat_cols + num_but_cat
    cat_cols = [col for col in cat_cols if col not in cat_but_car]
    num_cols = [col for col in dataframe.columns if dataframe[col].dtypes != "O" and col not in num_but_cat]

    print(f"Observations: {dataframe.shape[0]}")
    print(f"Variables: {dataframe.shape[1]}")
    print(f'cat_cols: {len(cat_cols)}')
    print(f'num_cols: {len(num_cols)}')
    print(f'cat_but_car: {len(cat_but_car)}')
    print(f'num_but_cat: {len(num_but_cat)}')

    return cat_cols, cat_but_car, num_cols

In [10]:
cat_cols, cat_but_car, num_cols = grab_col_names(df)

Observations: 223650
Variables: 15
cat_cols: 9
num_cols: 6
cat_but_car: 0
num_but_cat: 9


# Initializing Label Encoder and Standard Scaler

In [11]:
le = LabelEncoder()
scaler = StandardScaler()

# Defining Categorical and Numeric Features

In [12]:
categorical_features = ['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'ca', 'thal'] 
numeric_features = ['age', 'trestbps', 'chol', 'thalach', 'oldpeak'] 

# Encoding Categorical Features and Scaling Numeric Features

In [13]:
df[categorical_features] = df[categorical_features].apply(le.fit_transform)
df[numeric_features] = scaler.fit_transform(df[numeric_features])

# Separating Target Variable

In [14]:
X = df.drop('target', axis=1)
y = df['target']

# Splitting Data into Training and Testing Sets for Model Evaluation

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# List of Regression Models for Comparison

In [16]:
models = [('LinearRegression', LinearRegression()),
          ('KNeighborsRegressor', KNeighborsRegressor()),
          ('DecisionTreeRegressor', DecisionTreeRegressor()),
          ('RandomForestRegressor', RandomForestRegressor(random_state=42))]

# Training and Evaluating Regression Models Using RMSE and MAE Metrics

In [None]:
for name, regressor in models:
    regressor.fit(X_train, y_train)
    y_pred = regressor.predict(X_test)
    
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mae = mean_absolute_error(y_test, y_pred)
    
    print(f"{name} - RMSE: {round(rmse, 4)}, MAE: {round(mae, 4)}")

# Training and Evaluating Classification Models Using Performance Metrics

In [None]:
classification_models = [('KNeighborsClassifier', KNeighborsClassifier()),
                         ('RandomForestClassifier', RandomForestClassifier(random_state=42))]

for name, classifier in classification_models:
    classifier.fit(X_train, y_train)
    y_pred = classifier.predict(X_test)
    
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)
    
    print(f"\n{name} - Accuracy: {round(accuracy, 4)}, Precision: {round(precision, 4)}, Recall: {round(recall, 4)}, F1 Score: {round(f1, 4)}")
    print(f"Confusion Matrix:\n{conf_matrix}")
    print(classification_report(y_test, y_pred))