# Heart Failure

In [None]:
# Import Dependencies 
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats as st
from scipy.stats import linregress

In [None]:
# Creating dataframes
fic_df = pd.read_csv("Resources/FIC.Full CSV.csv")
cause_df = pd.read_csv("Resources/cause_of_deaths.csv")
heart_df = pd.read_csv("Resources/heart.csv")

fic_df.head()

In [None]:
# Get dataframe column values
fic_df.columns

In [None]:
# Cleaning the data columns
fic_df.rename(columns={'Age.Group': 'Age Group', 'Locality  ': 'Locality',
                                'Marital status                       ': 'Marital Status',
                                'Life.Style                                                                              ': 'Life Style',
                                'Hyperlip': 'Hyperlipidemia', 'Family.History': 'Family History',
                                'HTN': 'Hypertension', 'BP': 'Blood Pressure', 'BGR': 'Blood Glucose Level', 'B.urea': 'Blood Urea Nitrogen',
                                'S.Cr': 'Serum Creatinine', 'S.Sodium': 'Serum Sodium', 'S.Potassium': 'Serum Potassium', 'Hyperlipi': 'Hyperlipidemia',
                                'S.Chloride': 'Serum Chloride', 'CPK': 'Creatine Phosphokinase', 'CK.MB': 'Creatine Kinase-MB',
                                'ESR': 'Erythrocyte Sedimentation Rate', 'WBC': 'White Blood Cell', 'RBC': 'Red Blood Cell', 'P.C.V': 'Packed Cell Volume',
                                'M.C.V': 'Mean Corpuscular Volume', 'M.C.H': 'Mean Corpuscular Hemoglobin', 'M.C.H.C': 'Mean Corpuscular Hemoglobin Concentration',
                                'PLATELET_COUNT': 'Platelet Count', 'NEUTROPHIL': 'Neutrophil', 'LYMPHO': 'Lymphocytes', 'MONOCYTE': 'Monocyte', 'EOSINO': 'Eosinophils',
                                'CO': 'Cardiac Output', 'cp': 'Constrictive Pericarditis', 'trestbps': 'Resting Blood Pressure', 'chol': 'Cholesterol',
                                'fbs': 'Fasting Blood Sugar', 'restecg': 'Rest ECG', 'ca': 'Cardiac Arrest', 'thal': 'Thalassemia', 'num': 'Num', 'Follow.Up': 'Follow Up'}, inplace= True)



In [None]:
# Show cleaned up dataframe
fic_df.head()

In [None]:
# Show cleaned up columns to verify
fic_df.columns

In [None]:
# Get values within certain columns for medical conditions
medical_history = fic_df[['Depression', 'Hyperlipidemia',
       'Diabetes', 'Hypertension', 'Allergies',
       'Thrombolysis', 'Hemoglobin',
       'Hypersensitivity', 'Mortality']]

# Showing the new Dataframe
medical_history.head()

In [None]:
# Gather mortality count per medical condition
depression = medical_history[['Depression', 'Mortality']].value_counts()
hyperlipidemia = medical_history[['Hyperlipidemia', 'Mortality']].value_counts()
diabetes = medical_history[['Diabetes', 'Mortality']].value_counts()
htn = medical_history[['Hypertension', 'Mortality']].value_counts()
allergies = medical_history[['Allergies', 'Mortality']].value_counts()
throm = medical_history[['Thrombolysis', 'Mortality']].value_counts()
hypers = medical_history[['Hypersensitivity', 'Mortality']].value_counts()

htn

### Plotting results per medical condition

In [None]:
# Depression
depression.plot(marker="o")

# Hyperlipidemia
hyperlipidemia.plot(marker="o")

# Diabetes
diabetes.plot(marker="o")

# Hypertension
htn.plot(marker="o")

# Allergies
allergies.plot(marker="o")

# Thrombolysis
throm.plot(marker="o")

# Hypersensitivity
hypers.plot(marker="o",title= "Mortality of Heart Failure With & Without Medical Conditions", xlabel= "Mortality", ylabel = "Number of Patients")
plt.legend(['Depression', 'Hyperlipidemia', 'Diabetes', 'Hypertension', 'Allergies', 'Thrombolysis', "Hypersensitivity"])


### Comparing total patients to the amount of medical conditions patients have to see correlation between Heart Failure with having certain Medical Conditions

In [None]:
# Full count of Dataset for Heart Failure total
number_of_patients = pd.DataFrame(medical_history[['Depression']].count())
number_of_patients = number_of_patients.iloc[0, 0]
round(number_of_patients)

In [None]:
# Getting list of a value for Depression
depression_total = medical_history[['Depression']]
depression_total = pd.DataFrame(depression_total.loc[depression_total['Depression'] == "YES", :].count())
depression_total = depression_total.iloc[0, 0]
depression_total

In [None]:
# Getting list of the other value for Hyperlipidemia
hyperlipi_total = medical_history[['Hyperlipidemia']]
hyperlipi_total = pd.DataFrame(hyperlipi_total.loc[hyperlipi_total['Hyperlipidemia'] == "YES", :].count())
hyperlipi_total = hyperlipi_total.iloc[0, 0]
hyperlipi_total

In [None]:
# Getting list of a value for Diabetes
diabetes_total = medical_history[['Diabetes']]
diabetes_total = diabetes_total.loc[diabetes_total['Diabetes'] == 1, :]
diabetes_total['Diabetes'].replace(1, "YES", inplace=True)
diabetes_total = pd.DataFrame(diabetes_total.count())
diabetes_total = diabetes_total.iloc[0, 0]
diabetes_total

In [None]:
# Getting list of a value for Hypertension
htn_total = medical_history[['Hypertension']]
htn_total = pd.DataFrame(htn_total.loc[htn_total['Hypertension'] == "YES", :].count())
htn_total = htn_total.iloc[0, 0]
htn_total

In [None]:
# Getting list of a value for Allergies
allergies_total = medical_history[['Allergies']]
allergies_total = pd.DataFrame(allergies_total.loc[allergies_total['Allergies'] == "YES", :].count())
allergies_total = allergies_total.iloc[0, 0]
allergies_total

In [None]:
# Getting list of a value for # Thrombolysis
throm_total = medical_history[['Thrombolysis']]
throm_total = throm_total.loc[throm_total['Thrombolysis'] == 1, :]
throm_total['Thrombolysis'].replace(1, "YES", inplace=True)
throm_total = pd.DataFrame(throm_total.count())
throm_total = throm_total.iloc[0, 0]
throm_total

In [None]:
# Getting list of a value for Hypersensitivity
hypers_total = medical_history[['Hypersensitivity']]
hypers_total = pd.DataFrame(hypers_total.loc[hypers_total['Hypersensitivity'] == "YES", :].count())
hypers_total = hypers_total.iloc[0, 0]
hypers_total

In [None]:
# Variables ready for plotting the data
medical_conditions = ["Depression", "Hyperlipidemia", "Hypertension", "Diabetes", "Allergies", "Thrombolysis", "Hypersensitivity"]
amount_conditions = np.arange(len(medical_conditions))
patients_conditions = [depression_total, hyperlipi_total,
                         htn_total, diabetes_total, allergies_total,
                        throm_total, hypers_total]



In [None]:
# Plot the data
plt.bar(amount_conditions, patients_conditions , align='center', color='r', alpha=0.7)

# Creating the ticks for our bar chart's x axis
tick_locations = [value for value in amount_conditions]
plt.xticks(tick_locations, medical_conditions, rotation= 45)
plt.show()

### Showing Correlation between Age and Cholesterol levels

In [None]:
# Plotting data
x_values = fic_df["Age"]
y_values = fic_df["Cholesterol"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.xlabel("Age")
plt.ylabel("Cholesterol")
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(5.8,0.8),fontsize=15,color="red")
print(rvalue)
plt.show()

In [None]:
# Showing Age Group Mean values regarding Cholesterol levels

# Getting mean value of cholesterol levels
mean_chol_by_age = fic_df.groupby('Age Group')['Cholesterol'].mean()

# Plotting the data
plt.plot(mean_chol_by_age.index,mean_chol_by_age,color="blue" )
plt.xlabel('Age Group')
plt.ylabel('Cholesterol Levels')
plt.title('Mean Cholesterol Levels by Heart Failure')
plt.show()

### Using a boxplot to show potential outliers using Blood Glucose Levels within Age Groups

In [None]:
# Plotting dataframe 
fic_df.boxplot(column='Blood Glucose Level', by='Age Group',flierprops=dict(marker='o', markerfacecolor='r', markersize=10))

# Adding Labels
plt.xlabel('Age Group')
plt.ylabel('Blood glucose levels')

### Showing Mean values for amount of Follow Ups the Age Groups within the dataframe

In [None]:
# Groupby varibale for the plot
summary_statistics = fic_df.groupby("Age Group")["Follow Up"].agg(['mean'])

# Plotting the data
plt.bar(summary_statistics.index, summary_statistics['mean'], label='Follow Ups')

# Adding labels
plt.xlabel('Age Group')
plt.ylabel('Amount of Follow Ups')
plt.title('Summary Statistics of Follow Up Amount by Age Group')
plt.legend()
plt.show()