In [1]:
import pandas as pd
import os

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

In [2]:
work_directory = "/".join(os.getcwd().split("/")[:-1])
DATASETS = os.path.join(work_directory, "data")
PROCESSED_PATH = os.path.join(DATASETS, "processed")

In [3]:
def read_directory(directory_name):
    return sorted(os.listdir(directory_name))

In [4]:
def create_directory(workdir, directory_name):
    if not os.path.exists(os.path.join(workdir, directory_name)):
        os.makedirs(os.path.join(workdir, directory_name))

In [5]:
def save_graphs(workdir, directory_name, graphname, plt):
    create_directory(workdir, directory_name)
    plt.savefig(os.path.join(workdir, directory_name, graphname))

In [6]:
def main():
    processed_dataset = read_directory(PROCESSED_PATH)

    df_1 = pd.read_parquet(os.path.join(PROCESSED_PATH, processed_dataset[0]))
    df_2 = pd.read_parquet(os.path.join(PROCESSED_PATH, processed_dataset[1]))
    df_3 = pd.read_parquet(os.path.join(PROCESSED_PATH, processed_dataset[2]))

    # Add all the dataframes together
    df = pd.concat(
        [df_1, df_2, df_3], ignore_index=True
    )  # Use ignore_index to reset index

    # Remove the datasets that are not needed
    del df_1, df_2, df_3

    model = LogisticRegression()

    # Divide X and y variables
    X = df.drop(columns=["Heart_Attack_Status_No", "Heart_Attack_Status_Yes"])
    y = df[
        "Heart_Attack_Status_Yes"
    ]  # Use only one target variable for binary classification

    # Fit the model
    y_pred = cross_val_predict(model, X, y, cv=5)

    # Calculate metrics
    accuracy = accuracy_score(y, y_pred)
    precision = precision_score(y, y_pred)
    recall = recall_score(y, y_pred)
    f1 = f1_score(y, y_pred)

    # Print the scores
    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1 Score:", f1)

In [7]:
if __name__ == "__main__":
    main()

Precision: 0.5793770455631964
Recall: 0.2571866990972755
F1 Score: 0.3562384117939673
