In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
from helper import *

# Baseline vs Optimized Model Accuracy

In [None]:
tunedScores = pd.read_csv('tunedScores.csv')
baselineScores = pd.read_csv('baselineScores.csv')
tunedTest = tunedScores.iloc[:, [1, 3, 5, 7, 9, 11, 13]]
baselineTest = baselineScores.iloc[:, [1, 3, 5, 7, 9, 11, 13]]

In [None]:
models = ['LR', 'NB', 'KNN', 'SVM', 'DT', 'RF', 'ANN']
baseline = baselineTest.iloc[0].to_numpy()
tuned = tunedTest.iloc[0].to_numpy()

In [None]:
x = np.arange(len(models))
width = 0.3  # the width of the bars

fig, ax = plt.subplots(figsize=(10, 7))
baseBars = ax.bar(x - width/2, baseline, width, label='Baseline Model', color='red')
tuneBars = ax.bar(x + width/2, tuned, width, label='Optimized Model', color='black')

ax.set_xlabel('Machine Learning Model', fontsize=15)
ax.set_ylim([75, 90])
ax.set_ylabel('Accuracy Score (%)', fontsize=15)
ax.set_title('Test Accuracy Score for Baseline vs Optimized Models', size=15)
ax.set_xticks(x)
ax.set_xticklabels(models)
ax.legend(fontsize=15)

fig.tight_layout()

fig.savefig('plots/baselineOptimizedPlot.png')

# Data Distribution Plot

In [None]:
data = pd.read_csv('heart.csv')
y = data.iloc[:,11]
y = y.to_numpy()

In [None]:
zeros = round(np.count_nonzero(y<1)/918*100)
ones = 100
plt.figure(figsize=(10, 2))
plt.title('Distribution of Classes in Dataset', fontsize=15)
plt.xlabel('Percentage of Dataset', fontsize=12)
plt.barh(' ', ones, height=0.2, label='1', color='red')
plt.barh(' ', zeros, height=0.2, label='0', color='black')
plt.barh(' ', 0, height=0.4)
plt.ylabel('Kaggle Dataset', fontsize=12)
plt.legend()
plt.tight_layout()
plt.savefig('plots/classDist.png')

# Spearman's Correlation Heat Map

In [None]:
x, y = load_dataset('heart_encoded.csv')
data = np.column_stack((x, y))
columns = "Age  Sex  CPT_ASY  CPT_ATA  CPT_NAP  CPT_TA  RestingBP  Cholesterol  FastingBS  rECG_LVH  rECG_Normal  rECG_ST  MaxHR  ExerciseAngina  Oldpeak  stSlope_Down  stSlope_Flat  stSlope_Up HeartDisease"
columns = columns.split()
dataframe = pd.DataFrame(data, columns=columns)

In [None]:
plt.figure(figsize=(18, 15))
sns.set_context(context="paper", font_scale=1.7)
plt.title("Correlation Matrix")
sns.heatmap(dataframe.corr(), annot=False, cmap='Blues')
plt.savefig("plots/corr_matrix.png")
plt.close()