# Heart Failure

In [None]:
# Import Dependencies 
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats as st
from scipy.stats import linregress

In [None]:
# Creating dataframes
fic_df = pd.read_csv("Resources/FIC.Full CSV.csv")
cause_df = pd.read_csv("Resources/cause_of_deaths.csv")

fic_df.head()

In [None]:
# Get dataframe column values
fic_df.columns

In [None]:
# Cleaning the data columns
fic_df.rename(columns={'Age.Group': 'Age Group', 'Locality  ': 'Locality',
                                'Marital status                       ': 'Marital Status',
                                'Life.Style                                                                              ': 'Life Style',
                                'Hyperlip': 'Hyperlipidemia', 'Family.History': 'Family History',
                                'HTN': 'Hypertension', 'BP': 'Blood Pressure', 'BGR': 'Blood Glucose Level', 'B.urea': 'Blood Urea Nitrogen',
                                'S.Cr': 'Serum Creatinine', 'S.Sodium': 'Serum Sodium', 'S.Potassium': 'Serum Potassium', 'Hyperlipi': 'Hyperlipidemia',
                                'S.Chloride': 'Serum Chloride', 'CPK': 'Creatine Phosphokinase', 'CK.MB': 'Creatine Kinase-MB',
                                'ESR': 'Erythrocyte Sedimentation Rate', 'WBC': 'White Blood Cell', 'RBC': 'Red Blood Cell', 'P.C.V': 'Packed Cell Volume',
                                'M.C.V': 'Mean Corpuscular Volume', 'M.C.H': 'Mean Corpuscular Hemoglobin', 'M.C.H.C': 'Mean Corpuscular Hemoglobin Concentration',
                                'PLATELET_COUNT': 'Platelet Count', 'NEUTROPHIL': 'Neutrophil', 'LYMPHO': 'Lymphocytes', 'MONOCYTE': 'Monocyte', 'EOSINO': 'Eosinophils',
                                'CO': 'Cardiac Output', 'cp': 'Constrictive Pericarditis', 'trestbps': 'Resting Blood Pressure', 'chol': 'Cholesterol',
                                'fbs': 'Fasting Blood Sugar', 'restecg': 'Rest ECG', 'ca': 'Cardiac Arrest', 'thal': 'Thalassemia', 'num': 'Num', 'Follow.Up': 'Follow Up'}, inplace= True)



In [None]:
# Show cleaned up dataframe
fic_df.head()

In [None]:
# Show cleaned up columns to verify
fic_df.columns

In [None]:
# Get values within certain columns for medical conditions
medical_history = fic_df[['Depression', 'Hyperlipidemia',
       'Diabetes', 'Hypertension', 'Allergies',
       'Thrombolysis', 'Hemoglobin',
       'Hypersensitivity', 'Mortality']]

# Showing the new Dataframe
medical_history.head()

In [None]:
# Gather mortality count per medical condition
depression = medical_history[['Depression', 'Mortality']].value_counts()
hyperlipidemia = medical_history[['Hyperlipidemia', 'Mortality']].value_counts()
diabetes = medical_history[['Diabetes', 'Mortality']].value_counts()
htn = medical_history[['Hypertension', 'Mortality']].value_counts()
allergies = medical_history[['Allergies', 'Mortality']].value_counts()
throm = medical_history[['Thrombolysis', 'Mortality']].value_counts()
hypers = medical_history[['Hypersensitivity', 'Mortality']].value_counts()

htn

### Plotting results per medical condition

In [None]:
# Depression
depression.plot(marker="o")

# Hyperlipidemia
hyperlipidemia.plot(marker="o")

# Diabetes
diabetes.plot(marker="o")

# Hypertension
htn.plot(marker="o")

# Allergies
allergies.plot(marker="o")

# Thrombolysis
throm.plot(marker="o")

# Hypersensitivity
hypers.plot(marker="o",title= "Mortality of Heart Failure With & Without Medical Conditions", xlabel= "Mortality", ylabel = "Number of Patients")
plt.legend(['Depression', 'Hyperlipidemia', 'Diabetes', 'Hypertension', 'Allergies', 'Thrombolysis', "Hypersensitivity"])


### Comparing total patients to the amount of medical conditions patients have to see correlation between Heart Failure with having certain Medical Conditions

In [None]:
# Full count of Dataset for Heart Failure total
number_of_patients = pd.DataFrame(medical_history[['Depression']].count())
number_of_patients = number_of_patients.iloc[0, 0]
round(number_of_patients)

In [None]:
# Getting list of a value for Depression
depression_total = medical_history[['Depression']]
depression_total = pd.DataFrame(depression_total.loc[depression_total['Depression'] == "YES", :].count())
depression_total = depression_total.iloc[0, 0]
depression_total

In [None]:
# Getting list of the other value for Hyperlipidemia
hyperlipi_total = medical_history[['Hyperlipidemia']]
hyperlipi_total = pd.DataFrame(hyperlipi_total.loc[hyperlipi_total['Hyperlipidemia'] == "YES", :].count())
hyperlipi_total = hyperlipi_total.iloc[0, 0]
hyperlipi_total

In [None]:
# Getting list of a value for Diabetes
diabetes_total = medical_history[['Diabetes']]
diabetes_total = diabetes_total.loc[diabetes_total['Diabetes'] == 1, :]
diabetes_total['Diabetes'].replace(1, "YES", inplace=True)
diabetes_total = pd.DataFrame(diabetes_total.count())
diabetes_total = diabetes_total.iloc[0, 0]
diabetes_total

In [None]:
# Getting list of a value for Hypertension
htn_total = medical_history[['Hypertension']]
htn_total = pd.DataFrame(htn_total.loc[htn_total['Hypertension'] == "YES", :].count())
htn_total = htn_total.iloc[0, 0]
htn_total

In [None]:
# Getting list of a value for Allergies
allergies_total = medical_history[['Allergies']]
allergies_total = pd.DataFrame(allergies_total.loc[allergies_total['Allergies'] == "YES", :].count())
allergies_total = allergies_total.iloc[0, 0]
allergies_total

In [None]:
# Getting list of a value for # Thrombolysis
throm_total = medical_history[['Thrombolysis']]
throm_total = throm_total.loc[throm_total['Thrombolysis'] == 1, :]
throm_total['Thrombolysis'].replace(1, "YES", inplace=True)
throm_total = pd.DataFrame(throm_total.count())
throm_total = throm_total.iloc[0, 0]
throm_total

In [None]:
# Getting list of a value for Hypersensitivity
hypers_total = medical_history[['Hypersensitivity']]
hypers_total = pd.DataFrame(hypers_total.loc[hypers_total['Hypersensitivity'] == "YES", :].count())
hypers_total = hypers_total.iloc[0, 0]
hypers_total

In [None]:
# Variables ready for plotting the data
medical_conditions = ["Depression", "Hyperlipidemia", "Hypertension", "Diabetes", "Allergies", "Thrombolysis", "Hypersensitivity"]
amount_conditions = np.arange(len(medical_conditions))
patients_conditions = [depression_total, hyperlipi_total,
                         htn_total, diabetes_total, allergies_total,
                        throm_total, hypers_total]



In [None]:
# Plot the data
plt.bar(amount_conditions, patients_conditions , align='center', color='r', alpha=0.7)

# Creating the ticks for our bar chart's x axis
tick_locations = [value for value in amount_conditions]
plt.xticks(tick_locations, medical_conditions, rotation= 45)
plt.title("Medical Condition within Heart Failure")
plt.show()

### Showing Correlation between Age and Cholesterol levels

In [None]:
# Plotting data
x_values = fic_df["Age"]
y_values = fic_df["Cholesterol"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.xlabel("Age")
plt.ylabel("Cholesterol")
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(5.8,0.8),fontsize=15,color="red")
print(rvalue)
plt.show()

### Showing Mean Cholesterol  between Age and Cholesterol levels

In [None]:
# Getting mean value of cholesterol levels
mean_chol_by_age = fic_df.groupby('Age Group')['Cholesterol'].mean()

# Plotting the data
plt.plot(mean_chol_by_age.index,mean_chol_by_age,color="blue" )
plt.xlabel('Age Group')
plt.ylabel('Cholesterol Levels')
plt.title('Mean Cholesterol Levels by Heart Failure')
plt.show()

### Plotting between Depression  and Electrolyte levels

In [None]:
# Plotting Data
plt.figure(figsize=(10, 5))
plt.subplot(1, 3, 1)
plt.bar(fic_df['Depression'], fic_df['Serum Sodium'], alpha=0.5)
plt.ylabel('Sodium Level')
plt.xlabel('Depression Status')
plt.title('Sodium vs Depression')

plt.subplot(1, 3, 2)
plt.bar(fic_df['Depression'], fic_df['Serum Chloride'], alpha=0.5)
plt.ylabel('Chloride Level')
plt.xlabel('Depression Status')
plt.title('Chloride vs Depression')

plt.subplot(1, 3, 3)
plt.bar(fic_df['Depression'], fic_df['Serum Potassium'], alpha=0.5)
plt.ylabel('Potassium Level')
plt.xlabel('Depression Status')
plt.title('Potassium vs Depression')

plt.tight_layout()
plt.show()

### Plotting  between Depression and BP levels

In [None]:
# Summary Variable for plot
summary_statistics = fic_df.groupby("Depression")["Blood Pressure"].agg(['mean', 'median', 'var', 'std', 'sem'])

# Plot mean of data
plt.bar(summary_statistics.index, summary_statistics['mean'], label='Mean',width=0.7, align='center')
plt.xlabel('Depression')
plt.ylabel('Blood Pressure')
plt.title('Summary Statistics of BP by Depression')
plt.xlim(-1, 2)
plt.legend()
plt.show()

### Plotting  between Depression and BGR levels

In [None]:
# Setting up Summary Variable
summary_statistics = fic_df.groupby("Depression")["Blood Glucose Level"].agg(['mean', 'median', 'var', 'std', 'sem'])

summary_statistics

# Plot mean of data
plt.bar(summary_statistics.index, summary_statistics['mean'], label='Mean',width=0.7)

plt.xlabel('Depression')
plt.ylabel('Blood Glucose Levels')
plt.title('Summary Statistics of Blood Glucose Levels by Depression')
plt.xlim(-1, 2)
plt.tight_layout()
plt.legend()
plt.show()

### Using a boxplot to show potential outliers using Blood Glucose Levels within Age Groups

In [None]:
# Plotting dataframe 
fic_df.boxplot(column='Blood Glucose Level', by='Age Group',flierprops=dict(marker='o', markerfacecolor='r', markersize=10))

# Adding Labels
plt.xlabel('Age Group')
plt.ylabel('Blood glucose levels')

### Family History of Heart Failure

In [None]:
# Varibales for plotting data
family_history = fic_df['F.History'].value_counts()
labels = ['Family History[Yes]', 'Family History[No]']
explode = (.1, 0)

In [None]:
# Plot Data
plt.figure(figsize=(10,5))
plt.pie(fic_df['F.History'].value_counts(), labels=labels,
        autopct='%1.1f%%', shadow=True, explode= explode, startangle=90)
plt.title("Family History within Heart Failure")
plt.show()

In [None]:
# Count target variable for gender
fic_df["Gender"].value_counts()

In [None]:
# Creating dataframe
pd.crosstab(fic_df["F.History"],fic_df["Gender"])

In [None]:
# Plotting Dataframe
pd.crosstab(fic_df["Family History"],fic_df["Gender"]).plot(kind="bar", width= 1.2)
plt.xlim(-1, 2)
plt.title("Family History of Heart Failure by Gender")

Mortality Rates in US and Other countries

In [None]:
# Get values within certain columns for mortality
death_history = cause_df[['Country/Territory', 'Year',
       'Cardiovascular Diseases']]

# Showing the new Dataframe
death_history.head()

In [None]:
# Gather mortality count per country

## how do i get the country column to only pull in the country desired??
US = death_history[['Country/Territory','Year','Cardiovascular Diseases']].value_counts()
Brazil = death_history[['Country/Territory','Year','Cardiovascular Diseases']].value_counts()
Canada = death_history[['Country/Territory','Year','Cardiovascular Diseases']].value_counts()
Afghanistan = death_history[['Country/Territory','Year','Cardiovascular Diseases']].value_counts()

US

In [None]:
# Creating dataframe to pull per-country information
df = pd.DataFrame(US)
df = df.iloc[:, :-1]
df.reset_index().head()

In [None]:
# Creating Afghanistan dataframe for plot
afghan_df = df.loc['Afghanistan']
afghan_df = afghan_df.reset_index()
afghan_df.head()

In [None]:
# Creating Canada dataframe for plot
canada_df = df.loc['Canada']
canada_df = canada_df.reset_index()
canada_df.head()

In [None]:
# Creating Brazil dataframe for plot
brazil_df = df.loc['Brazil']
brazil_df = brazil_df.reset_index()
brazil_df.head()

In [None]:
# Creating the separate United States Dataframe for plot
cardioyear_df = df.loc['United States']
cardioyear_df = cardioyear_df.reset_index()
cardioyear_df.head()

In [None]:
# Creating columns as variables for plot
us_year = cardioyear_df['Year']
us_diseases = cardioyear_df['Cardiovascular Diseases']

In [None]:
# Plotting the US standalone data
plt.scatter(us_year, us_diseases)
plt.title('United States vs Caridovascular Disease')
plt.ylabel("Cardiovascular Disease Count")
plt.xlabel("Year")

In [None]:
# Creating variables for plot
afghan_disease = afghan_df['Cardiovascular Diseases']
canada_disease = canada_df['Cardiovascular Diseases']
brazil_disease = brazil_df['Cardiovascular Diseases']

In [None]:
# Plot US vs other countries
plt.scatter(us_year, us_diseases)
plt.scatter(us_year, afghan_disease)
plt.scatter(us_year, canada_disease)
plt.scatter(us_year, brazil_disease)

# Give our chart some labels and a tile
plt.title("US vs Other Countries")
plt.legend(["US", "Brazil", "Canada", "Afghanistan"])
plt.xlabel("Year")
plt.ylabel("Cardiovascular Disease Count")
plt.show()

Mortality in Location and Gender

In [None]:
# Get values within certain columns for location and gender conditions
rural = fic_df[['Locality', 'Gender',
       'Age Group', 'Mortality']]

# Showing the new Dataframe
rural.head()

In [None]:
# Gather mortality count per location and gender condition
Gender = rural[['Gender', 'Mortality']].value_counts()
Age_Grp = rural[['Age Group', 'Mortality']].value_counts()
Locality = rural[['Locality', 'Mortality']].value_counts()


Gender

In [None]:
# Getting list of the other value for  Rural Locality
Rural_total = rural[['Locality']]
Rural_total = pd.DataFrame(Rural_total.loc[Rural_total['Locality'] == "RURAL", :].count())
Rural_total = Rural_total.iloc[0, 0]
Rural_total

In [None]:
# Getting list of the other value for Urban Locality
Urban_total = rural[['Locality']]
Urban_total = pd.DataFrame(Urban_total.loc[Urban_total['Locality'] == "URBAN", :].count())
Urban_total = Urban_total.iloc[0, 0]
Urban_total

In [None]:
# Getting list of the other value for Male
Male_total = rural[['Gender']]
Male_total = pd.DataFrame(Male_total.loc[Male_total['Gender'] == "Male", :].count())
Male_total = Male_total.iloc[0, 0]
Male_total

In [None]:
# Getting list of the other value for Female
Female_total = rural[['Gender']]
Female_total = pd.DataFrame(Female_total.loc[Female_total['Gender'] == "Female", :].count())
Female_total = Female_total.iloc[0, 0]
Female_total

In [None]:
# Getting list of the other value for 31-40 group
thirty = rural[['Age Group']]
thirty = pd.DataFrame(thirty.loc[thirty['Age Group'] == "31-40", :].count())
thirty = thirty.iloc[0, 0]
thirty

In [None]:
# Getting list of the other value for 41-50 group
fourty = rural[['Age Group']]
fourty = pd.DataFrame(fourty.loc[fourty['Age Group'] == "41-50", :].count())
fourty = fourty.iloc[0, 0]
fourty

In [None]:
# Getting list of the other value for 51-60 group
fifty = rural[['Age Group']]
fifty = pd.DataFrame(fifty.loc[fifty['Age Group'] == "51-60", :].count())
fifty = fifty.iloc[0, 0]
fifty

In [None]:
# Getting list of the other value for 61-70 group
sixty = rural[['Age Group']]
sixty = pd.DataFrame(sixty.loc[sixty['Age Group'] == "61-70", :].count())
sixty = sixty.iloc[0, 0]
sixty

In [None]:
# Variables ready for plotting the data
rural = ["Urban", "Rural", "Male", "Female", "Thirty", "Forty", "Fifty", "Sixty"]
totals = [Urban_total, Rural_total, Male_total, Female_total, thirty, fourty, fifty, sixty]
amount = np.arange(len(totals))

In [None]:
# Plot the data

## I am struggling getting this bar graph??

plt.bar(amount, totals , align='center', color='r', alpha=0.6)

# Creating the ticks for our bar chart's x axis
ticks = [value for value in amount]
plt.xticks(ticks, rural, rotation= 45)
plt.title("Mortality based on Region by Gender")
plt.show()