These code snippets generate the dataframe images used in the README file. The first snippet generates the representative data (10 rows from the reduced model) from a CSV file that was reformatted in OpenOffice to make it more understandable. The second snippet generates an image of the coefficient dataframe that displays the coefficient estimates for the reduced model. In both cases, the dataframe_image module.

In [1]:
import pandas as pd
import dataframe_image as dfi

In [2]:
# Set random state for all operations
random_state = 525

In [3]:
# Export an image of representative data (10 rows)

heart_model_input_df = pd.read_csv('../Data/heart_model_input.csv')

In [4]:
# Make a data frame with representative values for presentation

# Drop unneccessary columns
# Note were are using the reduced model for this

drop_list = ['rest_bp', 'chol', 'blood_sugar', 'restecg', 'age_scaled', 'rest_bp_scaled', 'chol_scaled', 'max_heart_rate_scaled', 'oldpeak_scaled', 'outlier']
heart_model_input_df.drop(drop_list, axis='columns', inplace=True)

In [10]:
# Create a df with a small random sample of the data
# so the Boolean fields aren't all one value

value_counts = heart_model_input_df['heart_disease'].value_counts()
idx = pd.Index(['No', 'Yes'], name = 'heart_disease')
value_counts.reset_index()
denom = len(heart_model_input_df)

weights = []
for idx in range(heart_model_input_df.index):
    if idx <= value_counts[0]:
        

# heart_model_sample_df = heart_model_input_df.sample(n = 10, replace = False, weights = 'heart_disease', random_state = random_state)

In [None]:
# Change Boolean features to text and recode them
# with more understandable names

# Make a dictionary with each column dtype as string
# Then do the cast
cast_list = ['sex', 'chest_pain', 'angina', 'ST_slope', 'flouroscopy', 'heart_defect', 'heart_disease']
cast_dict = {k: 'string' for k in cast_list}
heart_model_cast_df = heart_model_input_df.astype(cast_dict).copy()

# Now, do the recode
# Sex and flouroscopy have different values and are handled separately

heart_model_cast_df.replace({'sex': {'0': 'Male', '1': 'Female'}}, inplace = True)
heart_model_cast_df.replace({'flouroscopy': {'0': 'Zero', '1': 'Nonzero'}}, inplace = True)

recode_list = ['chest_pain', 'angina', 'ST_slope', 'heart_defect', 'heart_disease']
recode_dict = {k: {'0': 'No', '1': 'Yes'} for k in recode_list}
heart_model_cast_df.replace(recode_dict, inplace=True)

In [None]:
# Code to export the coefficient dataframe after making it easier to read.

# Load the full coefficient dataframe

coef_df_all = pd.read_csv('../Data/coefficient_df.csv')
coef_df_all.set_index('coef', inplace = True)

In [None]:
# Generate the df containing only the reduced model to export

df = coef_df_all.copy()

# Drop uneccesary columns and rows

df.drop(['clf_all', 'clf_trimmed'], axis='columns', inplace=True)
df.drop(['rest_bp', 'chol', 'blood_sugar', 'restecg'], axis='index', inplace=True)

# Rename column and index so they are easier to understand

df.rename_axis(index= {'coef': 'feature'}, axis='index', inplace=True)
df.rename(columns = {'clf_reduced': 'value'}, inplace=True)

# Round feature column to three decimal places

# df_formatted = df.round({'value': 2})
# df_formatted
pd.set_option('precision', 2)
pd.options.display.float_format = '{:20,.2f}'.format
dfi.export(df, '../Images/coefficient_figure.png')