In [None]:
import numpy as np
import pandas as pd
import csv
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Tuple
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import LocalOutlierFactor
from sklearn.model_selection import train_test_split,StratifiedKFold,GridSearchCV
from imblearn.over_sampling import SMOTE
from sklearn.metrics import classification_report


In [None]:
class DatasetName:
    train_data: pd.DataFrame
    test_data: pd.DataFrame

    # do not call without fabric method
    def __init__(self, train_data: pd.DataFrame, test_data: pd.DataFrame):
        self.train_data = train_data
        self.test_data = test_data

    @classmethod
    def load(cls):
        """
        loads datasets with given name
        :return: (train_dataframe, test_dataframe)
        """
        df = pd.read_csv('data/heart_disease_train.csv')
        return DatasetName(df, None)

    def prepare(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
        """
        does preprocessing with dataset, feature generation, etc.
        :return: (train_x, train_y, test_x, test_y)
        """
        df = self.train_data
        df.dropna()

        for feature in ['BMI', 'MentHlth', 'PhysHlth']: 
            df[feature] = df[feature].astype('int64')
            df[feature] = MinMaxScaler(feature_range=(0, 1)).fit_transform(df[[feature]])

        # Split column to Feature(X) and Target(Y)
        X = df.drop(columns='HeartDiseaseorAttack')
        Y = df['HeartDiseaseorAttack']

        train_x, test_x, train_y, test_y = train_test_split(X, Y, test_size=0.3, stratify=Y, random_state=42)

        return train_x, train_y, test_x, test_y

In [None]:
(train_x, train_y, test_x, test_y) = DatasetName.load().prepare()


from sklearn.ensemble import RandomForestClassifier
random_forest = RandomForestClassifier(n_estimators=100)
random_forest.fit(train_x, train_y)

pred_y = random_forest.predict(test_x)

from sklearn.metrics import confusion_matrix
test_con_mat = confusion_matrix(test_y, pred_y)

import seaborn as sns
import matplotlib.pyplot as plt     

ax= plt.subplot()
sns.heatmap(test_con_mat, annot=True, fmt='g', ax=ax);  #annot=True to annotate cells, ftm='g' to disable scientific notation

# labels, title and ticks
ax.set_xlabel('Predicted labels');ax.set_ylabel('True labels'); 
ax.set_title('Confusion Matrix'); 
ax.xaxis.set_ticklabels(['no_risk', 'risk']); ax.yaxis.set_ticklabels(['no risk', 'risk']);


print(classification_report(test_y, pred_y))
